Index: head/cddl/lib/libzfs/Makefile
===================================================================
--- head/cddl/lib/libzfs/Makefile	(revision 318735)
+++ head/cddl/lib/libzfs/Makefile	(revision 318736)
@@ -1,57 +1,58 @@
 # $FreeBSD$
 
 .PATH: ${SRCTOP}/cddl/compat/opensolaris/misc
 .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs
 .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
 .PATH: ${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common
 
 LIB=	zfs
 LIBADD=	md pthread umem util uutil m avl bsdxml geom nvpair z zfs_core
 SRCS=	deviceid.c \
 	fsshare.c \
 	mkdirp.c \
 	mnttab.c \
 	thread_pool.c \
 	zmount.c \
 	zone.c
 
 SRCS+=	libzfs_changelist.c \
 	libzfs_compat.c \
 	libzfs_config.c \
 	libzfs_dataset.c \
 	libzfs_diff.c \
 	libzfs_import.c \
 	libzfs_iter.c \
 	libzfs_mount.c \
 	libzfs_pool.c \
 	libzfs_sendrecv.c \
 	libzfs_status.c \
 	libzfs_util.c \
 	zfeature_common.c \
 	zfs_comutil.c \
 	zfs_deleg.c \
 	zfs_fletcher.c \
 	zfs_namecheck.c \
 	zfs_prop.c \
 	zpool_prop.c \
 	zprop_common.c \
 
 WARNS?=	0
+SHLIB_MAJOR= 3
 CSTD=	c99
 CFLAGS+= -DZFS_NO_ACL
 CFLAGS+= -I${SRCTOP}/sbin/mount
 CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris
 CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
 CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common
 CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs
 CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
 CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/sys
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head
 CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libuutil/common
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common
 CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common
 
 .include <bsd.lib.mk>
Index: head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc
===================================================================
--- head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc	(revision 318735)
+++ head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc	(revision 318736)
@@ -1,1405 +1,1409 @@
 //===-- sanitizer_linux.cc ------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file is shared between AddressSanitizer and ThreadSanitizer
 // run-time libraries and implements linux-specific functions from
 // sanitizer_libc.h.
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
 
 #if SANITIZER_FREEBSD || SANITIZER_LINUX
 
 #include "sanitizer_common.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_linux.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
 
 #if !SANITIZER_FREEBSD
 #include <asm/param.h>
 #endif
 
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
 // sys/stat.h, we use this trick.
 #if defined(__mips64)
 #include <asm/unistd.h>
 #include <sys/types.h>
 #define stat kernel_stat
 #include <asm/stat.h>
 #undef stat
 #endif
 
 #include <dlfcn.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <link.h>
 #include <pthread.h>
 #include <sched.h>
 #include <sys/mman.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <ucontext.h>
 #include <unistd.h>
 
 #if SANITIZER_FREEBSD
 #include <sys/exec.h>
 #include <sys/sysctl.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <machine/atomic.h>
 extern "C" {
 // <sys/umtx.h> must be included after <errno.h> and <sys/types.h> on
 // FreeBSD 9.2 and 10.0.
 #include <sys/umtx.h>
 }
 extern char **environ;  // provided by crt1
 #endif  // SANITIZER_FREEBSD
 
 #if !SANITIZER_ANDROID
 #include <sys/signal.h>
 #endif
 
 #if SANITIZER_LINUX
 // <linux/time.h>
 struct kernel_timeval {
   long tv_sec;
   long tv_usec;
 };
 
 // <linux/futex.h> is broken on some linux distributions.
 const int FUTEX_WAIT = 0;
 const int FUTEX_WAKE = 1;
 #endif  // SANITIZER_LINUX
 
 // Are we using 32-bit or 64-bit Linux syscalls?
 // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32
 // but it still needs to use 64-bit syscalls.
 #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \
     SANITIZER_WORDSIZE == 64)
 # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1
 #else
 # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
 #endif
 
 #if defined(__x86_64__) || SANITIZER_MIPS64
 extern "C" {
 extern void internal_sigreturn();
 }
 #endif
 
 namespace __sanitizer {
 
 #if SANITIZER_LINUX && defined(__x86_64__)
 #include "sanitizer_syscall_linux_x86_64.inc"
 #elif SANITIZER_LINUX && defined(__aarch64__)
 #include "sanitizer_syscall_linux_aarch64.inc"
 #else
 #include "sanitizer_syscall_generic.inc"
 #endif
 
 // --------------- sanitizer_libc.h
 #if !SANITIZER_S390
 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
                    OFF_T offset) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
   return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd,
                           offset);
 #else
   // mmap2 specifies file offset in 4096-byte units.
   CHECK(IsAligned(offset, 4096));
   return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd,
                           offset / 4096);
 #endif
 }
 #endif // !SANITIZER_S390
 
 uptr internal_munmap(void *addr, uptr length) {
   return internal_syscall(SYSCALL(munmap), (uptr)addr, length);
 }
 
 int internal_mprotect(void *addr, uptr length, int prot) {
   return internal_syscall(SYSCALL(mprotect), (uptr)addr, length, prot);
 }
 
 uptr internal_close(fd_t fd) {
   return internal_syscall(SYSCALL(close), fd);
 }
 
 uptr internal_open(const char *filename, int flags) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags);
 #else
   return internal_syscall(SYSCALL(open), (uptr)filename, flags);
 #endif
 }
 
 uptr internal_open(const char *filename, int flags, u32 mode) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags,
                           mode);
 #else
   return internal_syscall(SYSCALL(open), (uptr)filename, flags, mode);
 #endif
 }
 
 uptr internal_read(fd_t fd, void *buf, uptr count) {
   sptr res;
   HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(read), fd, (uptr)buf,
                count));
   return res;
 }
 
 uptr internal_write(fd_t fd, const void *buf, uptr count) {
   sptr res;
   HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(write), fd, (uptr)buf,
                count));
   return res;
 }
 
 uptr internal_ftruncate(fd_t fd, uptr size) {
   sptr res;
   HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(ftruncate), fd,
                (OFF_T)size));
   return res;
 }
 
 #if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && !SANITIZER_FREEBSD
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
   out->st_mode = in->st_mode;
   out->st_nlink = in->st_nlink;
   out->st_uid = in->st_uid;
   out->st_gid = in->st_gid;
   out->st_rdev = in->st_rdev;
   out->st_size = in->st_size;
   out->st_blksize = in->st_blksize;
   out->st_blocks = in->st_blocks;
   out->st_atime = in->st_atime;
   out->st_mtime = in->st_mtime;
   out->st_ctime = in->st_ctime;
   out->st_ino = in->st_ino;
 }
 #endif
 
 #if defined(__mips64)
 static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
   out->st_mode = in->st_mode;
   out->st_nlink = in->st_nlink;
   out->st_uid = in->st_uid;
   out->st_gid = in->st_gid;
   out->st_rdev = in->st_rdev;
   out->st_size = in->st_size;
   out->st_blksize = in->st_blksize;
   out->st_blocks = in->st_blocks;
   out->st_atime = in->st_atime_nsec;
   out->st_mtime = in->st_mtime_nsec;
   out->st_ctime = in->st_ctime_nsec;
   out->st_ino = in->st_ino;
 }
 #endif
 
 uptr internal_stat(const char *path, void *buf) {
 #if SANITIZER_FREEBSD
-  return internal_syscall(SYSCALL(stat), path, buf);
+  return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path,
+                          (uptr)buf, 0);
 #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path,
                           (uptr)buf, 0);
 #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
 # if defined(__mips64)
   // For mips64, stat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(stat), path, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
 # else
   return internal_syscall(SYSCALL(stat), (uptr)path, (uptr)buf);
 # endif
 #else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(stat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
 #endif
 }
 
 uptr internal_lstat(const char *path, void *buf) {
 #if SANITIZER_FREEBSD
-  return internal_syscall(SYSCALL(lstat), path, buf);
+  return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path,
+                         (uptr)buf, AT_SYMLINK_NOFOLLOW);
 #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path,
                          (uptr)buf, AT_SYMLINK_NOFOLLOW);
 #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
 # if SANITIZER_MIPS64
   // For mips64, lstat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(lstat), path, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
 # else
   return internal_syscall(SYSCALL(lstat), (uptr)path, (uptr)buf);
 # endif
 #else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(lstat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
 #endif
 }
 
 uptr internal_fstat(fd_t fd, void *buf) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
 # if SANITIZER_MIPS64
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
 # else
   return internal_syscall(SYSCALL(fstat), fd, (uptr)buf);
 # endif
 #else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstat64), fd, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
 #endif
 }
 
 uptr internal_filesize(fd_t fd) {
   struct stat st;
   if (internal_fstat(fd, &st))
     return -1;
   return (uptr)st.st_size;
 }
 
 uptr internal_dup2(int oldfd, int newfd) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(dup3), oldfd, newfd, 0);
 #else
   return internal_syscall(SYSCALL(dup2), oldfd, newfd);
 #endif
 }
 
 uptr internal_readlink(const char *path, char *buf, uptr bufsize) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(readlinkat), AT_FDCWD,
                           (uptr)path, (uptr)buf, bufsize);
 #else
   return internal_syscall(SYSCALL(readlink), (uptr)path, (uptr)buf, bufsize);
 #endif
 }
 
 uptr internal_unlink(const char *path) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0);
 #else
   return internal_syscall(SYSCALL(unlink), (uptr)path);
 #endif
 }
 
 uptr internal_rename(const char *oldpath, const char *newpath) {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath);
 #else
   return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath);
 #endif
 }
 
 uptr internal_sched_yield() {
   return internal_syscall(SYSCALL(sched_yield));
 }
 
 void internal__exit(int exitcode) {
 #if SANITIZER_FREEBSD
   internal_syscall(SYSCALL(exit), exitcode);
 #else
   internal_syscall(SYSCALL(exit_group), exitcode);
 #endif
   Die();  // Unreachable.
 }
 
 unsigned int internal_sleep(unsigned int seconds) {
   struct timespec ts;
   ts.tv_sec = 1;
   ts.tv_nsec = 0;
   int res = internal_syscall(SYSCALL(nanosleep), &ts, &ts);
   if (res) return ts.tv_sec;
   return 0;
 }
 
 uptr internal_execve(const char *filename, char *const argv[],
                      char *const envp[]) {
   return internal_syscall(SYSCALL(execve), (uptr)filename, (uptr)argv,
                           (uptr)envp);
 }
 
 // ----------------- sanitizer_common.h
 bool FileExists(const char *filename) {
   struct stat st;
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   if (internal_syscall(SYSCALL(newfstatat), AT_FDCWD, filename, &st, 0))
 #else
   if (internal_stat(filename, &st))
 #endif
     return false;
   // Sanity check: filename is a regular file.
   return S_ISREG(st.st_mode);
 }
 
 uptr GetTid() {
 #if SANITIZER_FREEBSD
   return (uptr)pthread_self();
 #else
   return internal_syscall(SYSCALL(gettid));
 #endif
 }
 
 u64 NanoTime() {
 #if SANITIZER_FREEBSD
   timeval tv;
 #else
   kernel_timeval tv;
 #endif
   internal_memset(&tv, 0, sizeof(tv));
   internal_syscall(SYSCALL(gettimeofday), (uptr)&tv, 0);
   return (u64)tv.tv_sec * 1000*1000*1000 + tv.tv_usec * 1000;
 }
 
 // Like getenv, but reads env directly from /proc (on Linux) or parses the
 // 'environ' array (on FreeBSD) and does not use libc. This function should be
 // called first inside __asan_init.
 const char *GetEnv(const char *name) {
 #if SANITIZER_FREEBSD
   if (::environ != 0) {
     uptr NameLen = internal_strlen(name);
     for (char **Env = ::environ; *Env != 0; Env++) {
       if (internal_strncmp(*Env, name, NameLen) == 0 && (*Env)[NameLen] == '=')
         return (*Env) + NameLen + 1;
     }
   }
   return 0;  // Not found.
 #elif SANITIZER_LINUX
   static char *environ;
   static uptr len;
   static bool inited;
   if (!inited) {
     inited = true;
     uptr environ_size;
     if (!ReadFileToBuffer("/proc/self/environ", &environ, &environ_size, &len))
       environ = nullptr;
   }
   if (!environ || len == 0) return nullptr;
   uptr namelen = internal_strlen(name);
   const char *p = environ;
   while (*p != '\0') {  // will happen at the \0\0 that terminates the buffer
     // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
     const char* endp =
         (char*)internal_memchr(p, '\0', len - (p - environ));
     if (!endp)  // this entry isn't NUL terminated
       return nullptr;
     else if (!internal_memcmp(p, name, namelen) && p[namelen] == '=')  // Match.
       return p + namelen + 1;  // point after =
     p = endp + 1;
   }
   return nullptr;  // Not found.
 #else
 #error "Unsupported platform"
 #endif
 }
 
 #if !SANITIZER_FREEBSD
 extern "C" {
   SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end;
 }
 #endif
 
 #if !SANITIZER_GO && !SANITIZER_FREEBSD
 static void ReadNullSepFileToArray(const char *path, char ***arr,
                                    int arr_size) {
   char *buff;
   uptr buff_size;
   uptr buff_len;
   *arr = (char **)MmapOrDie(arr_size * sizeof(char *), "NullSepFileArray");
   if (!ReadFileToBuffer(path, &buff, &buff_size, &buff_len, 1024 * 1024)) {
     (*arr)[0] = nullptr;
     return;
   }
   (*arr)[0] = buff;
   int count, i;
   for (count = 1, i = 1; ; i++) {
     if (buff[i] == 0) {
       if (buff[i+1] == 0) break;
       (*arr)[count] = &buff[i+1];
       CHECK_LE(count, arr_size - 1);  // FIXME: make this more flexible.
       count++;
     }
   }
   (*arr)[count] = nullptr;
 }
 #endif
 
 static void GetArgsAndEnv(char ***argv, char ***envp) {
 #if !SANITIZER_FREEBSD
 #if !SANITIZER_GO
   if (&__libc_stack_end) {
 #endif
     uptr* stack_end = (uptr*)__libc_stack_end;
     int argc = *stack_end;
     *argv = (char**)(stack_end + 1);
     *envp = (char**)(stack_end + argc + 2);
 #if !SANITIZER_GO
   } else {
     static const int kMaxArgv = 2000, kMaxEnvp = 2000;
     ReadNullSepFileToArray("/proc/self/cmdline", argv, kMaxArgv);
     ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp);
   }
 #endif
 #else
   // On FreeBSD, retrieving the argument and environment arrays is done via the
   // kern.ps_strings sysctl, which returns a pointer to a structure containing
   // this information. See also <sys/exec.h>.
   ps_strings *pss;
   size_t sz = sizeof(pss);
   if (sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) {
     Printf("sysctl kern.ps_strings failed\n");
     Die();
   }
   *argv = pss->ps_argvstr;
   *envp = pss->ps_envstr;
 #endif
 }
 
 char **GetArgv() {
   char **argv, **envp;
   GetArgsAndEnv(&argv, &envp);
   return argv;
 }
 
 void ReExec() {
   char **argv, **envp;
   GetArgsAndEnv(&argv, &envp);
   uptr rv = internal_execve("/proc/self/exe", argv, envp);
   int rverrno;
   CHECK_EQ(internal_iserror(rv, &rverrno), true);
   Printf("execve failed, errno %d\n", rverrno);
   Die();
 }
 
 enum MutexState {
   MtxUnlocked = 0,
   MtxLocked = 1,
   MtxSleeping = 2
 };
 
 BlockingMutex::BlockingMutex() {
   internal_memset(this, 0, sizeof(*this));
 }
 
 void BlockingMutex::Lock() {
   CHECK_EQ(owner_, 0);
   atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
   if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked)
     return;
   while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) {
 #if SANITIZER_FREEBSD
     _umtx_op(m, UMTX_OP_WAIT_UINT, MtxSleeping, 0, 0);
 #else
     internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAIT, MtxSleeping, 0, 0, 0);
 #endif
   }
 }
 
 void BlockingMutex::Unlock() {
   atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
   u32 v = atomic_exchange(m, MtxUnlocked, memory_order_relaxed);
   CHECK_NE(v, MtxUnlocked);
   if (v == MtxSleeping) {
 #if SANITIZER_FREEBSD
     _umtx_op(m, UMTX_OP_WAKE, 1, 0, 0);
 #else
     internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAKE, 1, 0, 0, 0);
 #endif
   }
 }
 
 void BlockingMutex::CheckLocked() {
   atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
   CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed));
 }
 
 // ----------------- sanitizer_linux.h
 // The actual size of this structure is specified by d_reclen.
 // Note that getdents64 uses a different structure format. We only provide the
 // 32-bit syscall here.
 struct linux_dirent {
 #if SANITIZER_X32 || defined(__aarch64__)
   u64 d_ino;
   u64 d_off;
 #else
   unsigned long      d_ino;
   unsigned long      d_off;
 #endif
   unsigned short     d_reclen;
 #ifdef __aarch64__
   unsigned char      d_type;
 #endif
   char               d_name[256];
 };
 
 // Syscall wrappers.
 uptr internal_ptrace(int request, int pid, void *addr, void *data) {
   return internal_syscall(SYSCALL(ptrace), request, pid, (uptr)addr,
                           (uptr)data);
 }
 
 uptr internal_waitpid(int pid, int *status, int options) {
   return internal_syscall(SYSCALL(wait4), pid, (uptr)status, options,
                           0 /* rusage */);
 }
 
 uptr internal_getpid() {
   return internal_syscall(SYSCALL(getpid));
 }
 
 uptr internal_getppid() {
   return internal_syscall(SYSCALL(getppid));
 }
 
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#if SANITIZER_FREEBSD
+  return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL);
+#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count);
 #else
   return internal_syscall(SYSCALL(getdents), fd, (uptr)dirp, count);
 #endif
 }
 
 uptr internal_lseek(fd_t fd, OFF_T offset, int whence) {
   return internal_syscall(SYSCALL(lseek), fd, offset, whence);
 }
 
 #if SANITIZER_LINUX
 uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) {
   return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5);
 }
 #endif
 
 uptr internal_sigaltstack(const struct sigaltstack *ss,
                          struct sigaltstack *oss) {
   return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
 }
 
 int internal_fork() {
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
 #else
   return internal_syscall(SYSCALL(fork));
 #endif
 }
 
 #if SANITIZER_LINUX
 #define SA_RESTORER 0x04000000
 // Doesn't set sa_restorer if the caller did not set it, so use with caution
 //(see below).
 int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
   __sanitizer_kernel_sigaction_t k_act, k_oldact;
   internal_memset(&k_act, 0, sizeof(__sanitizer_kernel_sigaction_t));
   internal_memset(&k_oldact, 0, sizeof(__sanitizer_kernel_sigaction_t));
   const __sanitizer_sigaction *u_act = (const __sanitizer_sigaction *)act;
   __sanitizer_sigaction *u_oldact = (__sanitizer_sigaction *)oldact;
   if (u_act) {
     k_act.handler = u_act->handler;
     k_act.sigaction = u_act->sigaction;
     internal_memcpy(&k_act.sa_mask, &u_act->sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     // Without SA_RESTORER kernel ignores the calls (probably returns EINVAL).
     k_act.sa_flags = u_act->sa_flags | SA_RESTORER;
     // FIXME: most often sa_restorer is unset, however the kernel requires it
     // to point to a valid signal restorer that calls the rt_sigreturn syscall.
     // If sa_restorer passed to the kernel is NULL, the program may crash upon
     // signal delivery or fail to unwind the stack in the signal handler.
     // libc implementation of sigaction() passes its own restorer to
     // rt_sigaction, so we need to do the same (we'll need to reimplement the
     // restorers; for x86_64 the restorer address can be obtained from
     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
 #if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     k_act.sa_restorer = u_act->sa_restorer;
 #endif
   }
 
   uptr result = internal_syscall(SYSCALL(rt_sigaction), (uptr)signum,
       (uptr)(u_act ? &k_act : nullptr),
       (uptr)(u_oldact ? &k_oldact : nullptr),
       (uptr)sizeof(__sanitizer_kernel_sigset_t));
 
   if ((result == 0) && u_oldact) {
     u_oldact->handler = k_oldact.handler;
     u_oldact->sigaction = k_oldact.sigaction;
     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     u_oldact->sa_flags = k_oldact.sa_flags;
 #if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     u_oldact->sa_restorer = k_oldact.sa_restorer;
 #endif
   }
   return result;
 }
 
 // Invokes sigaction via a raw syscall with a restorer, but does not support
 // all platforms yet.
 // We disable for Go simply because we have not yet added to buildgo.sh.
 #if (defined(__x86_64__) || SANITIZER_MIPS64) && !SANITIZER_GO
 int internal_sigaction_syscall(int signum, const void *act, void *oldact) {
   if (act == nullptr)
     return internal_sigaction_norestorer(signum, act, oldact);
   __sanitizer_sigaction u_adjust;
   internal_memcpy(&u_adjust, act, sizeof(u_adjust));
 #if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     if (u_adjust.sa_restorer == nullptr) {
       u_adjust.sa_restorer = internal_sigreturn;
     }
 #endif
     return internal_sigaction_norestorer(signum, (const void *)&u_adjust,
                                          oldact);
 }
 #endif // defined(__x86_64__) && !SANITIZER_GO
 #endif  // SANITIZER_LINUX
 
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
     __sanitizer_sigset_t *oldset) {
 #if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(sigprocmask), how, set, oldset);
 #else
   __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
   __sanitizer_kernel_sigset_t *k_oldset = (__sanitizer_kernel_sigset_t *)oldset;
   return internal_syscall(SYSCALL(rt_sigprocmask), (uptr)how,
                           (uptr)&k_set->sig[0], (uptr)&k_oldset->sig[0],
                           sizeof(__sanitizer_kernel_sigset_t));
 #endif
 }
 
 void internal_sigfillset(__sanitizer_sigset_t *set) {
   internal_memset(set, 0xff, sizeof(*set));
 }
 
 void internal_sigemptyset(__sanitizer_sigset_t *set) {
   internal_memset(set, 0, sizeof(*set));
 }
 
 #if SANITIZER_LINUX
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum) {
   signum -= 1;
   CHECK_GE(signum, 0);
   CHECK_LT(signum, sizeof(*set) * 8);
   __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
   const uptr idx = signum / (sizeof(k_set->sig[0]) * 8);
   const uptr bit = signum % (sizeof(k_set->sig[0]) * 8);
   k_set->sig[idx] &= ~(1 << bit);
 }
 
 bool internal_sigismember(__sanitizer_sigset_t *set, int signum) {
   signum -= 1;
   CHECK_GE(signum, 0);
   CHECK_LT(signum, sizeof(*set) * 8);
   __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
   const uptr idx = signum / (sizeof(k_set->sig[0]) * 8);
   const uptr bit = signum % (sizeof(k_set->sig[0]) * 8);
   return k_set->sig[idx] & (1 << bit);
 }
 #endif  // SANITIZER_LINUX
 
 // ThreadLister implementation.
 ThreadLister::ThreadLister(int pid)
   : pid_(pid),
     descriptor_(-1),
     buffer_(4096),
     error_(true),
     entry_((struct linux_dirent *)buffer_.data()),
     bytes_read_(0) {
   char task_directory_path[80];
   internal_snprintf(task_directory_path, sizeof(task_directory_path),
                     "/proc/%d/task/", pid);
   uptr openrv = internal_open(task_directory_path, O_RDONLY | O_DIRECTORY);
   if (internal_iserror(openrv)) {
     error_ = true;
     Report("Can't open /proc/%d/task for reading.\n", pid);
   } else {
     error_ = false;
     descriptor_ = openrv;
   }
 }
 
 int ThreadLister::GetNextTID() {
   int tid = -1;
   do {
     if (error_)
       return -1;
     if ((char *)entry_ >= &buffer_[bytes_read_] && !GetDirectoryEntries())
       return -1;
     if (entry_->d_ino != 0 && entry_->d_name[0] >= '0' &&
         entry_->d_name[0] <= '9') {
       // Found a valid tid.
       tid = (int)internal_atoll(entry_->d_name);
     }
     entry_ = (struct linux_dirent *)(((char *)entry_) + entry_->d_reclen);
   } while (tid < 0);
   return tid;
 }
 
 void ThreadLister::Reset() {
   if (error_ || descriptor_ < 0)
     return;
   internal_lseek(descriptor_, 0, SEEK_SET);
 }
 
 ThreadLister::~ThreadLister() {
   if (descriptor_ >= 0)
     internal_close(descriptor_);
 }
 
 bool ThreadLister::error() { return error_; }
 
 bool ThreadLister::GetDirectoryEntries() {
   CHECK_GE(descriptor_, 0);
   CHECK_NE(error_, true);
   bytes_read_ = internal_getdents(descriptor_,
                                   (struct linux_dirent *)buffer_.data(),
                                   buffer_.size());
   if (internal_iserror(bytes_read_)) {
     Report("Can't read directory entries from /proc/%d/task.\n", pid_);
     error_ = true;
     return false;
   } else if (bytes_read_ == 0) {
     return false;
   }
   entry_ = (struct linux_dirent *)buffer_.data();
   return true;
 }
 
 uptr GetPageSize() {
 // Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array.
 #if SANITIZER_ANDROID
   return 4096;
 #elif SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__))
   return EXEC_PAGESIZE;
 #else
   return sysconf(_SC_PAGESIZE);  // EXEC_PAGESIZE may not be trustworthy.
 #endif
 }
 
 uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
 #if SANITIZER_FREEBSD
   const int Mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
   const char *default_module_name = "kern.proc.pathname";
   size_t Size = buf_len;
   bool IsErr = (sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0);
   int readlink_error = IsErr ? errno : 0;
   uptr module_name_len = Size;
 #else
   const char *default_module_name = "/proc/self/exe";
   uptr module_name_len = internal_readlink(
       default_module_name, buf, buf_len);
   int readlink_error;
   bool IsErr = internal_iserror(module_name_len, &readlink_error);
 #endif
   if (IsErr) {
     // We can't read binary name for some reason, assume it's unknown.
     Report("WARNING: reading executable name failed with errno %d, "
            "some stack frames may not be symbolized\n", readlink_error);
     module_name_len = internal_snprintf(buf, buf_len, "%s",
                                         default_module_name);
     CHECK_LT(module_name_len, buf_len);
   }
   return module_name_len;
 }
 
 uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) {
 #if SANITIZER_LINUX
   char *tmpbuf;
   uptr tmpsize;
   uptr tmplen;
   if (ReadFileToBuffer("/proc/self/cmdline", &tmpbuf, &tmpsize, &tmplen,
                        1024 * 1024)) {
     internal_strncpy(buf, tmpbuf, buf_len);
     UnmapOrDie(tmpbuf, tmpsize);
     return internal_strlen(buf);
   }
 #endif
   return ReadBinaryName(buf, buf_len);
 }
 
 // Match full names of the form /path/to/base_name{-,.}*
 bool LibraryNameIs(const char *full_name, const char *base_name) {
   const char *name = full_name;
   // Strip path.
   while (*name != '\0') name++;
   while (name > full_name && *name != '/') name--;
   if (*name == '/') name++;
   uptr base_name_length = internal_strlen(base_name);
   if (internal_strncmp(name, base_name, base_name_length)) return false;
   return (name[base_name_length] == '-' || name[base_name_length] == '.');
 }
 
 #if !SANITIZER_ANDROID
 // Call cb for each region mapped by map.
 void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) {
   CHECK_NE(map, nullptr);
 #if !SANITIZER_FREEBSD
   typedef ElfW(Phdr) Elf_Phdr;
   typedef ElfW(Ehdr) Elf_Ehdr;
 #endif  // !SANITIZER_FREEBSD
   char *base = (char *)map->l_addr;
   Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
   char *phdrs = base + ehdr->e_phoff;
   char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
 
   // Find the segment with the minimum base so we can "relocate" the p_vaddr
   // fields.  Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
   // objects have a non-zero base.
   uptr preferred_base = (uptr)-1;
   for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
     Elf_Phdr *phdr = (Elf_Phdr *)iter;
     if (phdr->p_type == PT_LOAD && preferred_base > (uptr)phdr->p_vaddr)
       preferred_base = (uptr)phdr->p_vaddr;
   }
 
   // Compute the delta from the real base to get a relocation delta.
   sptr delta = (uptr)base - preferred_base;
   // Now we can figure out what the loader really mapped.
   for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
     Elf_Phdr *phdr = (Elf_Phdr *)iter;
     if (phdr->p_type == PT_LOAD) {
       uptr seg_start = phdr->p_vaddr + delta;
       uptr seg_end = seg_start + phdr->p_memsz;
       // None of these values are aligned.  We consider the ragged edges of the
       // load command as defined, since they are mapped from the file.
       seg_start = RoundDownTo(seg_start, GetPageSizeCached());
       seg_end = RoundUpTo(seg_end, GetPageSizeCached());
       cb((void *)seg_start, seg_end - seg_start);
     }
   }
 }
 #endif
 
 #if defined(__x86_64__) && SANITIZER_LINUX
 // We cannot use glibc's clone wrapper, because it messes with the child
 // task's TLS. It writes the PID and TID of the child task to its thread
 // descriptor, but in our case the child task shares the thread descriptor with
 // the parent (because we don't know how to allocate a new thread
 // descriptor to keep glibc happy). So the stock version of clone(), when
 // used with CLONE_VM, would end up corrupting the parent's thread descriptor.
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
   child_stack = (char *)child_stack - 2 * sizeof(unsigned long long);
   ((unsigned long long *)child_stack)[0] = (uptr)fn;
   ((unsigned long long *)child_stack)[1] = (uptr)arg;
   register void *r8 __asm__("r8") = newtls;
   register int *r10 __asm__("r10") = child_tidptr;
   __asm__ __volatile__(
                        /* %rax = syscall(%rax = SYSCALL(clone),
                         *                %rdi = flags,
                         *                %rsi = child_stack,
                         *                %rdx = parent_tidptr,
                         *                %r8  = new_tls,
                         *                %r10 = child_tidptr)
                         */
                        "syscall\n"
 
                        /* if (%rax != 0)
                         *   return;
                         */
                        "testq  %%rax,%%rax\n"
                        "jnz    1f\n"
 
                        /* In the child. Terminate unwind chain. */
                        // XXX: We should also terminate the CFI unwind chain
                        // here. Unfortunately clang 3.2 doesn't support the
                        // necessary CFI directives, so we skip that part.
                        "xorq   %%rbp,%%rbp\n"
 
                        /* Call "fn(arg)". */
                        "popq   %%rax\n"
                        "popq   %%rdi\n"
                        "call   *%%rax\n"
 
                        /* Call _exit(%rax). */
                        "movq   %%rax,%%rdi\n"
                        "movq   %2,%%rax\n"
                        "syscall\n"
 
                        /* Return to parent. */
                      "1:\n"
                        : "=a" (res)
                        : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)),
                          "S"(child_stack),
                          "D"(flags),
                          "d"(parent_tidptr),
                          "r"(r8),
                          "r"(r10)
                        : "rsp", "memory", "r11", "rcx");
   return res;
 }
 #elif defined(__mips__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
   child_stack = (char *)child_stack - 2 * sizeof(unsigned long long);
   ((unsigned long long *)child_stack)[0] = (uptr)fn;
   ((unsigned long long *)child_stack)[1] = (uptr)arg;
   register void *a3 __asm__("$7") = newtls;
   register int *a4 __asm__("$8") = child_tidptr;
   // We don't have proper CFI directives here because it requires alot of code
   // for very marginal benefits.
   __asm__ __volatile__(
                        /* $v0 = syscall($v0 = __NR_clone,
                         * $a0 = flags,
                         * $a1 = child_stack,
                         * $a2 = parent_tidptr,
                         * $a3 = new_tls,
                         * $a4 = child_tidptr)
                         */
                        ".cprestore 16;\n"
                        "move $4,%1;\n"
                        "move $5,%2;\n"
                        "move $6,%3;\n"
                        "move $7,%4;\n"
                        /* Store the fifth argument on stack
                         * if we are using 32-bit abi.
                         */
 #if SANITIZER_WORDSIZE == 32
                        "lw %5,16($29);\n"
 #else
                        "move $8,%5;\n"
 #endif
                        "li $2,%6;\n"
                        "syscall;\n"
 
                        /* if ($v0 != 0)
                         * return;
                         */
                        "bnez $2,1f;\n"
 
                        /* Call "fn(arg)". */
 #if SANITIZER_WORDSIZE == 32
 #ifdef __BIG_ENDIAN__
                        "lw $25,4($29);\n"
                        "lw $4,12($29);\n"
 #else
                        "lw $25,0($29);\n"
                        "lw $4,8($29);\n"
 #endif
 #else
                        "ld $25,0($29);\n"
                        "ld $4,8($29);\n"
 #endif
                        "jal $25;\n"
 
                        /* Call _exit($v0). */
                        "move $4,$2;\n"
                        "li $2,%7;\n"
                        "syscall;\n"
 
                        /* Return to parent. */
                      "1:\n"
                        : "=r" (res)
                        : "r"(flags),
                          "r"(child_stack),
                          "r"(parent_tidptr),
                          "r"(a3),
                          "r"(a4),
                          "i"(__NR_clone),
                          "i"(__NR_exit)
                        : "memory", "$29" );
   return res;
 }
 #elif defined(__aarch64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
   child_stack = (char *)child_stack - 2 * sizeof(unsigned long long);
   ((unsigned long long *)child_stack)[0] = (uptr)fn;
   ((unsigned long long *)child_stack)[1] = (uptr)arg;
 
   register int (*__fn)(void *)  __asm__("x0") = fn;
   register void *__stack __asm__("x1") = child_stack;
   register int   __flags __asm__("x2") = flags;
   register void *__arg   __asm__("x3") = arg;
   register int  *__ptid  __asm__("x4") = parent_tidptr;
   register void *__tls   __asm__("x5") = newtls;
   register int  *__ctid  __asm__("x6") = child_tidptr;
 
   __asm__ __volatile__(
                        "mov x0,x2\n" /* flags  */
                        "mov x2,x4\n" /* ptid  */
                        "mov x3,x5\n" /* tls  */
                        "mov x4,x6\n" /* ctid  */
                        "mov x8,%9\n" /* clone  */
 
                        "svc 0x0\n"
 
                        /* if (%r0 != 0)
                         *   return %r0;
                         */
                        "cmp x0, #0\n"
                        "bne 1f\n"
 
                        /* In the child, now. Call "fn(arg)". */
                        "ldp x1, x0, [sp], #16\n"
                        "blr x1\n"
 
                        /* Call _exit(%r0).  */
                        "mov x8, %10\n"
                        "svc 0x0\n"
                      "1:\n"
 
                        : "=r" (res)
                        : "i"(-EINVAL),
                          "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
                          "r"(__ptid), "r"(__tls), "r"(__ctid),
                          "i"(__NR_clone), "i"(__NR_exit)
                        : "x30", "memory");
   return res;
 }
 #elif defined(__powerpc64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                    int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
 /* Stack frame offsets.  */
 #if _CALL_ELF != 2
 #define FRAME_MIN_SIZE         112
 #define FRAME_TOC_SAVE         40
 #else
 #define FRAME_MIN_SIZE         32
 #define FRAME_TOC_SAVE         24
 #endif
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
   child_stack = (char *)child_stack - 2 * sizeof(unsigned long long);
   ((unsigned long long *)child_stack)[0] = (uptr)fn;
   ((unsigned long long *)child_stack)[1] = (uptr)arg;
 
   register int (*__fn)(void *) __asm__("r3") = fn;
   register void *__cstack      __asm__("r4") = child_stack;
   register int __flags         __asm__("r5") = flags;
   register void * __arg        __asm__("r6") = arg;
   register int * __ptidptr     __asm__("r7") = parent_tidptr;
   register void * __newtls     __asm__("r8") = newtls;
   register int * __ctidptr     __asm__("r9") = child_tidptr;
 
  __asm__ __volatile__(
            /* fn, arg, child_stack are saved acrVoss the syscall */
            "mr 28, %5\n\t"
            "mr 29, %6\n\t"
            "mr 27, %8\n\t"
 
            /* syscall
              r3 == flags
              r4 == child_stack
              r5 == parent_tidptr
              r6 == newtls
              r7 == child_tidptr */
            "mr 3, %7\n\t"
            "mr 5, %9\n\t"
            "mr 6, %10\n\t"
            "mr 7, %11\n\t"
            "li 0, %3\n\t"
            "sc\n\t"
 
            /* Test if syscall was successful */
            "cmpdi  cr1, 3, 0\n\t"
            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
            "bne-   cr1, 1f\n\t"
 
            /* Do the function call */
            "std   2, %13(1)\n\t"
 #if _CALL_ELF != 2
            "ld    0, 0(28)\n\t"
            "ld    2, 8(28)\n\t"
            "mtctr 0\n\t"
 #else
            "mr    12, 28\n\t"
            "mtctr 12\n\t"
 #endif
            "mr    3, 27\n\t"
            "bctrl\n\t"
            "ld    2, %13(1)\n\t"
 
            /* Call _exit(r3) */
            "li 0, %4\n\t"
            "sc\n\t"
 
            /* Return to parent */
            "1:\n\t"
            "mr %0, 3\n\t"
              : "=r" (res)
              : "0" (-1), "i" (EINVAL),
                "i" (__NR_clone), "i" (__NR_exit),
                "r" (__fn), "r" (__cstack), "r" (__flags),
                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
                "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE)
              : "cr0", "cr1", "memory", "ctr",
                "r0", "r29", "r27", "r28");
   return res;
 }
 #endif  // defined(__x86_64__) && SANITIZER_LINUX
 
 #if SANITIZER_ANDROID
 #if __ANDROID_API__ < 21
 extern "C" __attribute__((weak)) int dl_iterate_phdr(
     int (*)(struct dl_phdr_info *, size_t, void *), void *);
 #endif
 
 static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
                                    void *data) {
   // Any name starting with "lib" indicates a bug in L where library base names
   // are returned instead of paths.
   if (info->dlpi_name && info->dlpi_name[0] == 'l' &&
       info->dlpi_name[1] == 'i' && info->dlpi_name[2] == 'b') {
     *(bool *)data = true;
     return 1;
   }
   return 0;
 }
 
 static atomic_uint32_t android_api_level;
 
 static AndroidApiLevel AndroidDetectApiLevel() {
   if (!&dl_iterate_phdr)
     return ANDROID_KITKAT; // K or lower
   bool base_name_seen = false;
   dl_iterate_phdr(dl_iterate_phdr_test_cb, &base_name_seen);
   if (base_name_seen)
     return ANDROID_LOLLIPOP_MR1; // L MR1
   return ANDROID_POST_LOLLIPOP;   // post-L
   // Plain L (API level 21) is completely broken wrt ASan and not very
   // interesting to detect.
 }
 
 AndroidApiLevel AndroidGetApiLevel() {
   AndroidApiLevel level =
       (AndroidApiLevel)atomic_load(&android_api_level, memory_order_relaxed);
   if (level) return level;
   level = AndroidDetectApiLevel();
   atomic_store(&android_api_level, level, memory_order_relaxed);
   return level;
 }
 
 #endif
 
 bool IsHandledDeadlySignal(int signum) {
   if (common_flags()->handle_abort && signum == SIGABRT)
     return true;
   if (common_flags()->handle_sigill && signum == SIGILL)
     return true;
   if (common_flags()->handle_sigfpe && signum == SIGFPE)
     return true;
   return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv;
 }
 
 #if !SANITIZER_GO
 void *internal_start_thread(void(*func)(void *arg), void *arg) {
   // Start the thread with signals blocked, otherwise it can steal user signals.
   __sanitizer_sigset_t set, old;
   internal_sigfillset(&set);
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked
   // on any thread, setuid call hangs (see test/tsan/setuid.c).
   internal_sigdelset(&set, 33);
 #endif
   internal_sigprocmask(SIG_SETMASK, &set, &old);
   void *th;
   real_pthread_create(&th, nullptr, (void*(*)(void *arg))func, arg);
   internal_sigprocmask(SIG_SETMASK, &old, nullptr);
   return th;
 }
 
 void internal_join_thread(void *th) {
   real_pthread_join(th, nullptr);
 }
 #else
 void *internal_start_thread(void (*func)(void *), void *arg) { return 0; }
 
 void internal_join_thread(void *th) {}
 #endif
 
 #if defined(__aarch64__)
 // Android headers in the older NDK releases miss this definition.
 struct __sanitizer_esr_context {
   struct _aarch64_ctx head;
   uint64_t esr;
 };
 
 static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
   static const u32 kEsrMagic = 0x45535201;
   u8 *aux = ucontext->uc_mcontext.__reserved;
   while (true) {
     _aarch64_ctx *ctx = (_aarch64_ctx *)aux;
     if (ctx->size == 0) break;
     if (ctx->magic == kEsrMagic) {
       *esr = ((__sanitizer_esr_context *)ctx)->esr;
       return true;
     }
     aux += ctx->size;
   }
   return false;
 }
 #endif
 
 SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) {
   ucontext_t *ucontext = (ucontext_t *)context;
 #if defined(__x86_64__) || defined(__i386__)
   static const uptr PF_WRITE = 1U << 1;
 #if SANITIZER_FREEBSD
   uptr err = ucontext->uc_mcontext.mc_err;
 #else
   uptr err = ucontext->uc_mcontext.gregs[REG_ERR];
 #endif
   return err & PF_WRITE ? WRITE : READ;
 #elif defined(__arm__)
   static const uptr FSR_WRITE = 1U << 11;
   uptr fsr = ucontext->uc_mcontext.error_code;
   return fsr & FSR_WRITE ? WRITE : READ;
 #elif defined(__aarch64__)
   static const u64 ESR_ELx_WNR = 1U << 6;
   u64 esr;
   if (!Aarch64GetESR(ucontext, &esr)) return UNKNOWN;
   return esr & ESR_ELx_WNR ? WRITE : READ;
 #else
   (void)ucontext;
   return UNKNOWN;  // FIXME: Implement.
 #endif
 }
 
 void SignalContext::DumpAllRegisters(void *context) {
   // FIXME: Implement this.
 }
 
 void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 #if defined(__arm__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.arm_pc;
   *bp = ucontext->uc_mcontext.arm_fp;
   *sp = ucontext->uc_mcontext.arm_sp;
 #elif defined(__aarch64__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.regs[29];
   *sp = ucontext->uc_mcontext.sp;
 #elif defined(__hppa__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.sc_iaoq[0];
   /* GCC uses %r3 whenever a frame pointer is needed.  */
   *bp = ucontext->uc_mcontext.sc_gr[3];
   *sp = ucontext->uc_mcontext.sc_gr[30];
 #elif defined(__x86_64__)
 # if SANITIZER_FREEBSD
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.mc_rip;
   *bp = ucontext->uc_mcontext.mc_rbp;
   *sp = ucontext->uc_mcontext.mc_rsp;
 # else
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.gregs[REG_RIP];
   *bp = ucontext->uc_mcontext.gregs[REG_RBP];
   *sp = ucontext->uc_mcontext.gregs[REG_RSP];
 # endif
 #elif defined(__i386__)
 # if SANITIZER_FREEBSD
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.mc_eip;
   *bp = ucontext->uc_mcontext.mc_ebp;
   *sp = ucontext->uc_mcontext.mc_esp;
 # else
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
   *sp = ucontext->uc_mcontext.gregs[REG_ESP];
 # endif
 #elif defined(__powerpc__) || defined(__powerpc64__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.regs->nip;
   *sp = ucontext->uc_mcontext.regs->gpr[PT_R1];
   // The powerpc{,64}-linux ABIs do not specify r31 as the frame
   // pointer, but GCC always uses r31 when we need a frame pointer.
   *bp = ucontext->uc_mcontext.regs->gpr[PT_R31];
 #elif defined(__sparc__)
   ucontext_t *ucontext = (ucontext_t*)context;
   uptr *stk_ptr;
 # if defined (__arch64__)
   *pc = ucontext->uc_mcontext.mc_gregs[MC_PC];
   *sp = ucontext->uc_mcontext.mc_gregs[MC_O6];
   stk_ptr = (uptr *) (*sp + 2047);
   *bp = stk_ptr[15];
 # else
   *pc = ucontext->uc_mcontext.gregs[REG_PC];
   *sp = ucontext->uc_mcontext.gregs[REG_O6];
   stk_ptr = (uptr *) *sp;
   *bp = stk_ptr[15];
 # endif
 #elif defined(__mips__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.gregs[30];
   *sp = ucontext->uc_mcontext.gregs[29];
 #elif defined(__s390__)
   ucontext_t *ucontext = (ucontext_t*)context;
 # if defined(__s390x__)
   *pc = ucontext->uc_mcontext.psw.addr;
 # else
   *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff;
 # endif
   *bp = ucontext->uc_mcontext.gregs[11];
   *sp = ucontext->uc_mcontext.gregs[15];
 #else
 # error "Unsupported arch"
 #endif
 }
 
 void MaybeReexec() {
   // No need to re-exec on Linux.
 }
 
 void PrintModuleMap() { }
 
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding) {
   UNREACHABLE("FindAvailableMemoryRange is not available");
   return 0;
 }
 
 } // namespace __sanitizer
 
 #endif // SANITIZER_FREEBSD || SANITIZER_LINUX
Index: head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
===================================================================
--- head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 318735)
+++ head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 318736)
@@ -1,1487 +1,1488 @@
 //===-- sanitizer_platform_limits_posix.h ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file is a part of Sanitizer common code.
 //
 // Sizes and layouts of platform-specific POSIX data structures.
 //===----------------------------------------------------------------------===//
 
 #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H
 #define SANITIZER_PLATFORM_LIMITS_POSIX_H
 
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_platform.h"
 
 #if SANITIZER_FREEBSD
 // FreeBSD's dlopen() returns a pointer to an Obj_Entry structure that
 // incorporates the map structure.
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
     ((link_map*)((handle) == nullptr ? nullptr : ((char*)(handle) + 544)))
 #else
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle))
 #endif  // !SANITIZER_FREEBSD
 
 #ifndef __GLIBC_PREREQ
 #define __GLIBC_PREREQ(x, y) 0
 #endif
 
 namespace __sanitizer {
   extern unsigned struct_utsname_sz;
   extern unsigned struct_stat_sz;
 #if !SANITIZER_FREEBSD && !SANITIZER_IOS
   extern unsigned struct_stat64_sz;
 #endif
   extern unsigned struct_rusage_sz;
   extern unsigned siginfo_t_sz;
   extern unsigned struct_itimerval_sz;
   extern unsigned pthread_t_sz;
   extern unsigned pthread_cond_t_sz;
   extern unsigned pid_t_sz;
   extern unsigned timeval_sz;
   extern unsigned uid_t_sz;
   extern unsigned gid_t_sz;
   extern unsigned mbstate_t_sz;
   extern unsigned struct_timezone_sz;
   extern unsigned struct_tms_sz;
   extern unsigned struct_itimerspec_sz;
   extern unsigned struct_sigevent_sz;
   extern unsigned struct_sched_param_sz;
   extern unsigned struct_statfs64_sz;
 
 #if !SANITIZER_ANDROID
   extern unsigned struct_statfs_sz;
   extern unsigned struct_sockaddr_sz;
   extern unsigned ucontext_t_sz;
 #endif // !SANITIZER_ANDROID
 
 #if SANITIZER_LINUX
 
 #if defined(__x86_64__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 0;
 #elif defined(__i386__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 96;
 #elif defined(__arm__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__aarch64__)
   const unsigned struct_kernel_stat_sz = 128;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__powerpc__) && !defined(__powerpc64__)
   const unsigned struct_kernel_stat_sz = 72;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__powerpc64__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__riscv__)
   /* RISCVTODO: check that these values are correct */
   const unsigned struct_kernel_stat_sz = 128;
   const unsigned struct_kernel_stat64_sz = 128;
 #elif defined(__mips__)
   const unsigned struct_kernel_stat_sz =
                  SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) :
                                      FIRST_32_SECOND_64(144, 216);
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390__) && !defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 0;
 #elif defined(__sparc__) && defined(__arch64__)
   const unsigned struct___old_kernel_stat_sz = 0;
   const unsigned struct_kernel_stat_sz = 104;
   const unsigned struct_kernel_stat64_sz = 144;
 #elif defined(__sparc__) && !defined(__arch64__)
   const unsigned struct___old_kernel_stat_sz = 0;
   const unsigned struct_kernel_stat_sz = 64;
   const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
     unsigned type;
     unsigned size;
     // More fields that vary with the kernel version.
   };
 
   extern unsigned struct_epoll_event_sz;
   extern unsigned struct_sysinfo_sz;
   extern unsigned __user_cap_header_struct_sz;
   extern unsigned __user_cap_data_struct_sz;
   extern unsigned struct_new_utsname_sz;
   extern unsigned struct_old_utsname_sz;
   extern unsigned struct_oldold_utsname_sz;
 
   const unsigned struct_kexec_segment_sz = 4 * sizeof(unsigned long);
 #endif  // SANITIZER_LINUX
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if defined(__powerpc64__) || defined(__riscv__) || defined(__s390__)
   const unsigned struct___old_kernel_stat_sz = 0;
 #elif !defined(__sparc__)
   const unsigned struct___old_kernel_stat_sz = 32;
 #endif
 
   extern unsigned struct_rlimit_sz;
   extern unsigned struct_utimbuf_sz;
   extern unsigned struct_timespec_sz;
 
   struct __sanitizer_iocb {
     u64   aio_data;
     u32   aio_key_or_aio_reserved1; // Simply crazy.
     u32   aio_reserved1_or_aio_key; // Luckily, we don't need these.
     u16   aio_lio_opcode;
     s16   aio_reqprio;
     u32   aio_fildes;
     u64   aio_buf;
     u64   aio_nbytes;
     s64   aio_offset;
     u64   aio_reserved2;
     u64   aio_reserved3;
   };
 
   struct __sanitizer_io_event {
     u64 data;
     u64 obj;
     u64 res;
     u64 res2;
   };
 
   const unsigned iocb_cmd_pread = 0;
   const unsigned iocb_cmd_pwrite = 1;
   const unsigned iocb_cmd_preadv = 7;
   const unsigned iocb_cmd_pwritev = 8;
 
   struct __sanitizer___sysctl_args {
     int *name;
     int nlen;
     void *oldval;
     uptr *oldlenp;
     void *newval;
     uptr newlen;
     unsigned long ___unused[4];
   };
 
   const unsigned old_sigset_t_sz = sizeof(unsigned long);
 
   struct __sanitizer_sem_t {
 #if SANITIZER_ANDROID && defined(_LP64)
     int data[4];
 #elif SANITIZER_ANDROID && !defined(_LP64)
     int data;
 #elif SANITIZER_LINUX
     uptr data[4];
 #elif SANITIZER_FREEBSD
     u32 data[4];
 #endif
   };
 #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_ANDROID
   struct __sanitizer_mallinfo {
     uptr v[10];
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_mallinfo {
     int v[10];
   };
 
   extern unsigned struct_ustat_sz;
   extern unsigned struct_rlimit64_sz;
   extern unsigned struct_statvfs64_sz;
 
   struct __sanitizer_ipc_perm {
     int __key;
     int uid;
     int gid;
     int cuid;
     int cgid;
 #ifdef __powerpc__
     unsigned mode;
     unsigned __seq;
     u64 __unused1;
     u64 __unused2;
 #elif defined(__sparc__)
 #if defined(__arch64__)
     unsigned mode;
     unsigned short __pad1;
 #else
     unsigned short __pad1;
     unsigned short mode;
     unsigned short __pad2;
 #endif
     unsigned short __seq;
     unsigned long long __unused1;
     unsigned long long __unused2;
 #elif defined(__mips__) || defined(__aarch64__) || defined(__s390x__)
     unsigned int mode;
     unsigned short __seq;
     unsigned short __pad1;
     unsigned long __unused1;
     unsigned long __unused2;
 #else
     unsigned short mode;
     unsigned short __pad1;
     unsigned short __seq;
     unsigned short __pad2;
 #if defined(__x86_64__) && !defined(_LP64)
     u64 __unused1;
     u64 __unused2;
 #else
     unsigned long __unused1;
     unsigned long __unused2;
 #endif
 #endif
   };
 
   struct __sanitizer_shmid_ds {
     __sanitizer_ipc_perm shm_perm;
   #if defined(__sparc__)
   #if !defined(__arch64__)
     u32 __pad1;
   #endif
     long shm_atime;
   #if !defined(__arch64__)
     u32 __pad2;
   #endif
     long shm_dtime;
   #if !defined(__arch64__)
     u32 __pad3;
   #endif
     long shm_ctime;
     uptr shm_segsz;
     int shm_cpid;
     int shm_lpid;
     unsigned long shm_nattch;
     unsigned long __glibc_reserved1;
     unsigned long __glibc_reserved2;
   #else
   #ifndef __powerpc__
     uptr shm_segsz;
   #elif !defined(__powerpc64__)
     uptr __unused0;
   #endif
   #if defined(__x86_64__) && !defined(_LP64)
     u64 shm_atime;
     u64 shm_dtime;
     u64 shm_ctime;
   #else
     uptr shm_atime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused1;
   #endif
     uptr shm_dtime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused2;
   #endif
     uptr shm_ctime;
   #if !defined(_LP64) && !defined(__mips__)
     uptr __unused3;
   #endif
   #endif
   #ifdef __powerpc__
     uptr shm_segsz;
   #endif
     int shm_cpid;
     int shm_lpid;
   #if defined(__x86_64__) && !defined(_LP64)
     u64 shm_nattch;
     u64 __unused4;
     u64 __unused5;
   #else
     uptr shm_nattch;
     uptr __unused4;
     uptr __unused5;
   #endif
 #endif
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_ipc_perm {
     unsigned int cuid;
     unsigned int cgid;
     unsigned int uid;
     unsigned int gid;
     unsigned short mode;
     unsigned short seq;
     long key;
   };
 
   struct __sanitizer_shmid_ds {
     __sanitizer_ipc_perm shm_perm;
     unsigned long shm_segsz;
     unsigned int shm_lpid;
     unsigned int shm_cpid;
     int shm_nattch;
     unsigned long shm_atime;
     unsigned long shm_dtime;
     unsigned long shm_ctime;
   };
 #endif
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_msqid_ds_sz;
   extern unsigned struct_mq_attr_sz;
   extern unsigned struct_timex_sz;
   extern unsigned struct_statvfs_sz;
 #endif  // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
   struct __sanitizer_iovec {
     void *iov_base;
     uptr iov_len;
   };
 
 #if !SANITIZER_ANDROID
   struct __sanitizer_ifaddrs {
     struct __sanitizer_ifaddrs *ifa_next;
     char *ifa_name;
     unsigned int ifa_flags;
     void *ifa_addr;    // (struct sockaddr *)
     void *ifa_netmask; // (struct sockaddr *)
     // This is a union on Linux.
 # ifdef ifa_dstaddr
 # undef ifa_dstaddr
 # endif
     void *ifa_dstaddr; // (struct sockaddr *)
     void *ifa_data;
   };
 #endif  // !SANITIZER_ANDROID
 
 #if SANITIZER_MAC
   typedef unsigned long __sanitizer_pthread_key_t;
 #else
   typedef unsigned __sanitizer_pthread_key_t;
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 
   struct __sanitizer_XDR {
     int x_op;
     void *x_ops;
     uptr x_public;
     uptr x_private;
     uptr x_base;
     unsigned x_handy;
   };
 
   const int __sanitizer_XDR_ENCODE = 0;
   const int __sanitizer_XDR_DECODE = 1;
   const int __sanitizer_XDR_FREE = 2;
 #endif
 
   struct __sanitizer_passwd {
     char *pw_name;
     char *pw_passwd;
     int pw_uid;
     int pw_gid;
 #if SANITIZER_MAC || SANITIZER_FREEBSD
     long pw_change;
     char *pw_class;
 #endif
 #if !(SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32))
     char *pw_gecos;
 #endif
     char *pw_dir;
     char *pw_shell;
 #if SANITIZER_MAC || SANITIZER_FREEBSD
     long pw_expire;
 #endif
 #if SANITIZER_FREEBSD
     int pw_fields;
 #endif
   };
 
   struct __sanitizer_group {
     char *gr_name;
     char *gr_passwd;
     int gr_gid;
     char **gr_mem;
   };
 
 #if defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer_time_t;
 #else
   typedef long __sanitizer_time_t;
 #endif
 
   struct __sanitizer_timeb {
     __sanitizer_time_t time;
     unsigned short millitm;
     short timezone;
     short dstflag;
   };
 
   struct __sanitizer_ether_addr {
     u8 octet[6];
   };
 
   struct __sanitizer_tm {
     int tm_sec;
     int tm_min;
     int tm_hour;
     int tm_mday;
     int tm_mon;
     int tm_year;
     int tm_wday;
     int tm_yday;
     int tm_isdst;
     long int tm_gmtoff;
     const char *tm_zone;
   };
 
 #if SANITIZER_LINUX
   struct __sanitizer_mntent {
     char *mnt_fsname;
     char *mnt_dir;
     char *mnt_type;
     char *mnt_opts;
     int mnt_freq;
     int mnt_passno;
   };
 #endif
 
 #if SANITIZER_MAC || SANITIZER_FREEBSD
   struct __sanitizer_msghdr {
     void *msg_name;
     unsigned msg_namelen;
     struct __sanitizer_iovec *msg_iov;
     unsigned msg_iovlen;
     void *msg_control;
     unsigned msg_controllen;
     int msg_flags;
   };
   struct __sanitizer_cmsghdr {
     unsigned cmsg_len;
     int cmsg_level;
     int cmsg_type;
   };
 #else
   struct __sanitizer_msghdr {
     void *msg_name;
     unsigned msg_namelen;
     struct __sanitizer_iovec *msg_iov;
     uptr msg_iovlen;
     void *msg_control;
     uptr msg_controllen;
     int msg_flags;
   };
   struct __sanitizer_cmsghdr {
     uptr cmsg_len;
     int cmsg_level;
     int cmsg_type;
   };
 #endif
 
 #if SANITIZER_MAC
   struct __sanitizer_dirent {
     unsigned long long d_ino;
     unsigned long long d_seekoff;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_dirent {
-    unsigned int d_fileno;
+    unsigned long long d_fileno;
+    unsigned long long d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #elif SANITIZER_ANDROID || defined(__x86_64__)
   struct __sanitizer_dirent {
     unsigned long long d_ino;
     unsigned long long d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #else
   struct __sanitizer_dirent {
     uptr d_ino;
     uptr d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_dirent64 {
     unsigned long long d_ino;
     unsigned long long d_off;
     unsigned short d_reclen;
     // more fields that we don't care about
   };
 #endif
 
 // 'clock_t' is 32 bits wide on x64 FreeBSD
 #if SANITIZER_FREEBSD
   typedef int __sanitizer_clock_t;
 #elif defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer_clock_t;
 #else
   typedef long __sanitizer_clock_t;
 #endif
 
 #if SANITIZER_LINUX
   typedef int __sanitizer_clockid_t;
 #endif
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
 #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__)\
                    || defined(__mips__)
   typedef unsigned __sanitizer___kernel_uid_t;
   typedef unsigned __sanitizer___kernel_gid_t;
 #else
   typedef unsigned short __sanitizer___kernel_uid_t;
   typedef unsigned short __sanitizer___kernel_gid_t;
 #endif
 #if defined(__x86_64__) && !defined(_LP64)
   typedef long long __sanitizer___kernel_off_t;
 #else
   typedef long __sanitizer___kernel_off_t;
 #endif
 
 #if defined(__powerpc__) || defined(__mips__) || defined(__riscv__)
   typedef unsigned int __sanitizer___kernel_old_uid_t;
   typedef unsigned int __sanitizer___kernel_old_gid_t;
 #else
   typedef unsigned short __sanitizer___kernel_old_uid_t;
   typedef unsigned short __sanitizer___kernel_old_gid_t;
 #endif
 
   typedef long long __sanitizer___kernel_loff_t;
   typedef struct {
     unsigned long fds_bits[1024 / (8 * sizeof(long))];
   } __sanitizer___kernel_fd_set;
 #endif
 
   // This thing depends on the platform. We are only interested in the upper
   // limit. Verified with a compiler assert in .cc.
   const int pthread_attr_t_max_sz = 128;
   union __sanitizer_pthread_attr_t {
     char size[pthread_attr_t_max_sz]; // NOLINT
     void *align;
   };
 
 #if SANITIZER_ANDROID
 # if SANITIZER_MIPS
   typedef unsigned long __sanitizer_sigset_t[16/sizeof(unsigned long)];
 # else
   typedef unsigned long __sanitizer_sigset_t;
 # endif
 #elif SANITIZER_MAC
   typedef unsigned __sanitizer_sigset_t;
 #elif SANITIZER_LINUX
   struct __sanitizer_sigset_t {
     // The size is determined by looking at sizeof of real sigset_t on linux.
     uptr val[128 / sizeof(uptr)];
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_sigset_t {
      // uint32_t * 4
      unsigned int __bits[4];
   };
 #endif
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
 #if SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 64)
   struct __sanitizer_sigaction {
     unsigned sa_flags;
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
     void (*sa_restorer)();
   };
 #elif SANITIZER_ANDROID && SANITIZER_MIPS32  // check this before WORDSIZE == 32
   struct __sanitizer_sigaction {
     unsigned sa_flags;
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
   };
 #elif SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32)
   struct __sanitizer_sigaction {
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
     __sanitizer_sigset_t sa_mask;
     uptr sa_flags;
     void (*sa_restorer)();
   };
 #else // !SANITIZER_ANDROID
   struct __sanitizer_sigaction {
 #if defined(__mips__) && !SANITIZER_FREEBSD
     unsigned int sa_flags;
 #endif
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
     };
 #if SANITIZER_FREEBSD
     int sa_flags;
     __sanitizer_sigset_t sa_mask;
 #else
 #if defined(__s390x__)
     int sa_resv;
 #else
     __sanitizer_sigset_t sa_mask;
 #endif
 #ifndef __mips__
 #if defined(__sparc__)
 #if __GLIBC_PREREQ (2, 20)
     // On sparc glibc 2.19 and earlier sa_flags was unsigned long.
 #if defined(__arch64__)
     // To maintain ABI compatibility on sparc64 when switching to an int,
     // __glibc_reserved0 was added.
     int __glibc_reserved0;
 #endif
     int sa_flags;
 #else
     unsigned long sa_flags;
 #endif
 #else
     int sa_flags;
 #endif
 #endif
 #endif
 #if SANITIZER_LINUX
     void (*sa_restorer)();
 #endif
 #if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
     int sa_resv[1];
 #endif
 #if defined(__s390x__)
     __sanitizer_sigset_t sa_mask;
 #endif
   };
 #endif // !SANITIZER_ANDROID
 
 #if SANITIZER_FREEBSD
   typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t;
 #elif defined(__mips__)
   struct __sanitizer_kernel_sigset_t {
     uptr sig[2];
   };
 #else
   struct __sanitizer_kernel_sigset_t {
     u8 sig[8];
   };
 #endif
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
 #if SANITIZER_MIPS
   struct __sanitizer_kernel_sigaction_t {
     unsigned int sa_flags;
     union {
       void (*handler)(int signo);
       void (*sigaction)(int signo, void *info, void *ctx);
     };
     __sanitizer_kernel_sigset_t sa_mask;
     void (*sa_restorer)(void);
   };
 #else
   struct __sanitizer_kernel_sigaction_t {
     union {
       void (*handler)(int signo);
       void (*sigaction)(int signo, void *info, void *ctx);
     };
     unsigned long sa_flags;
     void (*sa_restorer)(void);
     __sanitizer_kernel_sigset_t sa_mask;
   };
 #endif
 
   extern uptr sig_ign;
   extern uptr sig_dfl;
   extern uptr sa_siginfo;
 
 #if SANITIZER_LINUX
   extern int e_tabsz;
 #endif
 
   extern int af_inet;
   extern int af_inet6;
   uptr __sanitizer_in_addr_sz(int af);
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   struct __sanitizer_dl_phdr_info {
     uptr dlpi_addr;
     const char *dlpi_name;
     const void *dlpi_phdr;
     short dlpi_phnum;
   };
 
   extern unsigned struct_ElfW_Phdr_sz;
 #endif
 
   struct __sanitizer_addrinfo {
     int ai_flags;
     int ai_family;
     int ai_socktype;
     int ai_protocol;
 #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD
     unsigned ai_addrlen;
     char *ai_canonname;
     void *ai_addr;
 #else // LINUX
     unsigned ai_addrlen;
     void *ai_addr;
     char *ai_canonname;
 #endif
     struct __sanitizer_addrinfo *ai_next;
   };
 
   struct __sanitizer_hostent {
     char *h_name;
     char **h_aliases;
     int h_addrtype;
     int h_length;
     char **h_addr_list;
   };
 
   struct __sanitizer_pollfd {
     int fd;
     short events;
     short revents;
   };
 
 #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD
   typedef unsigned __sanitizer_nfds_t;
 #else
   typedef unsigned long __sanitizer_nfds_t;
 #endif
 
 #if !SANITIZER_ANDROID
 # if SANITIZER_LINUX
   struct __sanitizer_glob_t {
     uptr gl_pathc;
     char **gl_pathv;
     uptr gl_offs;
     int gl_flags;
 
     void (*gl_closedir)(void *dirp);
     void *(*gl_readdir)(void *dirp);
     void *(*gl_opendir)(const char *);
     int (*gl_lstat)(const char *, void *);
     int (*gl_stat)(const char *, void *);
   };
 # elif SANITIZER_FREEBSD
   struct __sanitizer_glob_t {
     uptr gl_pathc;
     uptr gl_matchc;
     uptr gl_offs;
     int gl_flags;
     char **gl_pathv;
     int (*gl_errfunc)(const char*, int);
     void (*gl_closedir)(void *dirp);
     struct dirent *(*gl_readdir)(void *dirp);
     void *(*gl_opendir)(const char*);
     int (*gl_lstat)(const char*, void* /* struct stat* */);
     int (*gl_stat)(const char*, void* /* struct stat* */);
   };
 # endif  // SANITIZER_FREEBSD
 
 # if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern int glob_nomatch;
   extern int glob_altdirfunc;
 # endif
 #endif  // !SANITIZER_ANDROID
 
   extern unsigned path_max;
 
   struct __sanitizer_wordexp_t {
     uptr we_wordc;
     char **we_wordv;
     uptr we_offs;
 #if SANITIZER_FREEBSD
     char *we_strings;
     uptr we_nbytes;
 #endif
   };
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   struct __sanitizer_FILE {
     int _flags;
     char *_IO_read_ptr;
     char *_IO_read_end;
     char *_IO_read_base;
     char *_IO_write_base;
     char *_IO_write_ptr;
     char *_IO_write_end;
     char *_IO_buf_base;
     char *_IO_buf_end;
     char *_IO_save_base;
     char *_IO_backup_base;
     char *_IO_save_end;
     void *_markers;
     __sanitizer_FILE *_chain;
     int _fileno;
   };
 # define SANITIZER_HAS_STRUCT_FILE 1
 #else
   typedef void __sanitizer_FILE;
 # define SANITIZER_HAS_STRUCT_FILE 0
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
     defined(__s390__))
   extern unsigned struct_user_regs_struct_sz;
   extern unsigned struct_user_fpregs_struct_sz;
   extern unsigned struct_user_fpxregs_struct_sz;
   extern unsigned struct_user_vfpregs_struct_sz;
 
   extern int ptrace_peektext;
   extern int ptrace_peekdata;
   extern int ptrace_peekuser;
   extern int ptrace_getregs;
   extern int ptrace_setregs;
   extern int ptrace_getfpregs;
   extern int ptrace_setfpregs;
   extern int ptrace_getfpxregs;
   extern int ptrace_setfpxregs;
   extern int ptrace_getvfpregs;
   extern int ptrace_setvfpregs;
   extern int ptrace_getsiginfo;
   extern int ptrace_setsiginfo;
   extern int ptrace_getregset;
   extern int ptrace_setregset;
   extern int ptrace_geteventmsg;
 #endif
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_shminfo_sz;
   extern unsigned struct_shm_info_sz;
   extern int shmctl_ipc_stat;
   extern int shmctl_ipc_info;
   extern int shmctl_shm_info;
   extern int shmctl_shm_stat;
 #endif
 
 #if !SANITIZER_MAC && !SANITIZER_FREEBSD
   extern unsigned struct_utmp_sz;
 #endif
 #if !SANITIZER_ANDROID
   extern unsigned struct_utmpx_sz;
 #endif
 
   extern int map_fixed;
 
   // ioctl arguments
   struct __sanitizer_ifconf {
     int ifc_len;
     union {
       void *ifcu_req;
     } ifc_ifcu;
 #if SANITIZER_MAC
   } __attribute__((packed));
 #else
   };
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 struct __sanitizer__obstack_chunk {
   char *limit;
   struct __sanitizer__obstack_chunk *prev;
 };
 
 struct __sanitizer_obstack {
   long chunk_size;
   struct __sanitizer__obstack_chunk *chunk;
   char *object_base;
   char *next_free;
   uptr more_fields[7];
 };
 
 typedef uptr (*__sanitizer_cookie_io_read)(void *cookie, char *buf, uptr size);
 typedef uptr (*__sanitizer_cookie_io_write)(void *cookie, const char *buf,
                                             uptr size);
 typedef int (*__sanitizer_cookie_io_seek)(void *cookie, u64 *offset,
                                           int whence);
 typedef int (*__sanitizer_cookie_io_close)(void *cookie);
 
 struct __sanitizer_cookie_io_functions_t {
   __sanitizer_cookie_io_read read;
   __sanitizer_cookie_io_write write;
   __sanitizer_cookie_io_seek seek;
   __sanitizer_cookie_io_close close;
 };
 #endif
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \
     defined(__sparc__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
 #define IOC_WRITE 4U
 #define IOC_READ 2U
 #else
 #define IOC_SIZEBITS 14
 #define IOC_DIRBITS 2
 #define IOC_NONE 0U
 #define IOC_WRITE 1U
 #define IOC_READ 2U
 #endif
 #define IOC_NRMASK ((1 << IOC_NRBITS) - 1)
 #define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1)
 #define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1)
 #if defined(IOC_DIRMASK)
 #undef IOC_DIRMASK
 #endif
 #define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1)
 #define IOC_NRSHIFT 0
 #define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS)
 #define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS)
 #define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS)
 #define EVIOC_EV_MAX 0x1f
 #define EVIOC_ABS_MAX 0x3f
 
 #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
 #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
 #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
 
 #if defined(__sparc__)
 // In sparc the 14 bits SIZE field overlaps with the
 // least significant bit of DIR, so either IOC_READ or
 // IOC_WRITE shall be 1 in order to get a non-zero SIZE.
 #define IOC_SIZE(nr) \
   ((((((nr) >> 29) & 0x7) & (4U | 2U)) == 0) ? 0 : (((nr) >> 16) & 0x3fff))
 #else
 #define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
 #endif
 
   extern unsigned struct_ifreq_sz;
   extern unsigned struct_termios_sz;
   extern unsigned struct_winsize_sz;
 
 #if SANITIZER_LINUX
   extern unsigned struct_arpreq_sz;
   extern unsigned struct_cdrom_msf_sz;
   extern unsigned struct_cdrom_multisession_sz;
   extern unsigned struct_cdrom_read_audio_sz;
   extern unsigned struct_cdrom_subchnl_sz;
   extern unsigned struct_cdrom_ti_sz;
   extern unsigned struct_cdrom_tocentry_sz;
   extern unsigned struct_cdrom_tochdr_sz;
   extern unsigned struct_cdrom_volctrl_sz;
   extern unsigned struct_ff_effect_sz;
   extern unsigned struct_floppy_drive_params_sz;
   extern unsigned struct_floppy_drive_struct_sz;
   extern unsigned struct_floppy_fdc_state_sz;
   extern unsigned struct_floppy_max_errors_sz;
   extern unsigned struct_floppy_raw_cmd_sz;
   extern unsigned struct_floppy_struct_sz;
   extern unsigned struct_floppy_write_errors_sz;
   extern unsigned struct_format_descr_sz;
   extern unsigned struct_hd_driveid_sz;
   extern unsigned struct_hd_geometry_sz;
   extern unsigned struct_input_absinfo_sz;
   extern unsigned struct_input_id_sz;
   extern unsigned struct_mtpos_sz;
   extern unsigned struct_termio_sz;
   extern unsigned struct_vt_consize_sz;
   extern unsigned struct_vt_sizes_sz;
   extern unsigned struct_vt_stat_sz;
 #endif  // SANITIZER_LINUX
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern unsigned struct_copr_buffer_sz;
   extern unsigned struct_copr_debug_buf_sz;
   extern unsigned struct_copr_msg_sz;
   extern unsigned struct_midi_info_sz;
   extern unsigned struct_mtget_sz;
   extern unsigned struct_mtop_sz;
   extern unsigned struct_rtentry_sz;
   extern unsigned struct_sbi_instrument_sz;
   extern unsigned struct_seq_event_rec_sz;
   extern unsigned struct_synth_info_sz;
   extern unsigned struct_vt_mode_sz;
 #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   extern unsigned struct_ax25_parms_struct_sz;
   extern unsigned struct_cyclades_monitor_sz;
   extern unsigned struct_input_keymap_entry_sz;
   extern unsigned struct_ipx_config_data_sz;
   extern unsigned struct_kbdiacrs_sz;
   extern unsigned struct_kbentry_sz;
   extern unsigned struct_kbkeycode_sz;
   extern unsigned struct_kbsentry_sz;
   extern unsigned struct_mtconfiginfo_sz;
   extern unsigned struct_nr_parms_struct_sz;
   extern unsigned struct_scc_modem_sz;
   extern unsigned struct_scc_stat_sz;
   extern unsigned struct_serial_multiport_struct_sz;
   extern unsigned struct_serial_struct_sz;
   extern unsigned struct_sockaddr_ax25_sz;
   extern unsigned struct_unimapdesc_sz;
   extern unsigned struct_unimapinit_sz;
 #endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned struct_audio_buf_info_sz;
   extern unsigned struct_ppp_stats_sz;
 #endif  // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
 #if !SANITIZER_ANDROID && !SANITIZER_MAC
   extern unsigned struct_sioc_sg_req_sz;
   extern unsigned struct_sioc_vif_req_sz;
 #endif
 
   // ioctl request identifiers
 
   // A special value to mark ioctls that are not present on the target platform,
   // when it can not be determined without including any system headers.
   extern const unsigned IOCTL_NOT_PRESENT;
 
   extern unsigned IOCTL_FIOASYNC;
   extern unsigned IOCTL_FIOCLEX;
   extern unsigned IOCTL_FIOGETOWN;
   extern unsigned IOCTL_FIONBIO;
   extern unsigned IOCTL_FIONCLEX;
   extern unsigned IOCTL_FIOSETOWN;
   extern unsigned IOCTL_SIOCADDMULTI;
   extern unsigned IOCTL_SIOCATMARK;
   extern unsigned IOCTL_SIOCDELMULTI;
   extern unsigned IOCTL_SIOCGIFADDR;
   extern unsigned IOCTL_SIOCGIFBRDADDR;
   extern unsigned IOCTL_SIOCGIFCONF;
   extern unsigned IOCTL_SIOCGIFDSTADDR;
   extern unsigned IOCTL_SIOCGIFFLAGS;
   extern unsigned IOCTL_SIOCGIFMETRIC;
   extern unsigned IOCTL_SIOCGIFMTU;
   extern unsigned IOCTL_SIOCGIFNETMASK;
   extern unsigned IOCTL_SIOCGPGRP;
   extern unsigned IOCTL_SIOCSIFADDR;
   extern unsigned IOCTL_SIOCSIFBRDADDR;
   extern unsigned IOCTL_SIOCSIFDSTADDR;
   extern unsigned IOCTL_SIOCSIFFLAGS;
   extern unsigned IOCTL_SIOCSIFMETRIC;
   extern unsigned IOCTL_SIOCSIFMTU;
   extern unsigned IOCTL_SIOCSIFNETMASK;
   extern unsigned IOCTL_SIOCSPGRP;
   extern unsigned IOCTL_TIOCCONS;
   extern unsigned IOCTL_TIOCEXCL;
   extern unsigned IOCTL_TIOCGETD;
   extern unsigned IOCTL_TIOCGPGRP;
   extern unsigned IOCTL_TIOCGWINSZ;
   extern unsigned IOCTL_TIOCMBIC;
   extern unsigned IOCTL_TIOCMBIS;
   extern unsigned IOCTL_TIOCMGET;
   extern unsigned IOCTL_TIOCMSET;
   extern unsigned IOCTL_TIOCNOTTY;
   extern unsigned IOCTL_TIOCNXCL;
   extern unsigned IOCTL_TIOCOUTQ;
   extern unsigned IOCTL_TIOCPKT;
   extern unsigned IOCTL_TIOCSCTTY;
   extern unsigned IOCTL_TIOCSETD;
   extern unsigned IOCTL_TIOCSPGRP;
   extern unsigned IOCTL_TIOCSTI;
   extern unsigned IOCTL_TIOCSWINSZ;
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned IOCTL_SIOCGETSGCNT;
   extern unsigned IOCTL_SIOCGETVIFCNT;
 #endif
 #if SANITIZER_LINUX
   extern unsigned IOCTL_EVIOCGABS;
   extern unsigned IOCTL_EVIOCGBIT;
   extern unsigned IOCTL_EVIOCGEFFECTS;
   extern unsigned IOCTL_EVIOCGID;
   extern unsigned IOCTL_EVIOCGKEY;
   extern unsigned IOCTL_EVIOCGKEYCODE;
   extern unsigned IOCTL_EVIOCGLED;
   extern unsigned IOCTL_EVIOCGNAME;
   extern unsigned IOCTL_EVIOCGPHYS;
   extern unsigned IOCTL_EVIOCGRAB;
   extern unsigned IOCTL_EVIOCGREP;
   extern unsigned IOCTL_EVIOCGSND;
   extern unsigned IOCTL_EVIOCGSW;
   extern unsigned IOCTL_EVIOCGUNIQ;
   extern unsigned IOCTL_EVIOCGVERSION;
   extern unsigned IOCTL_EVIOCRMFF;
   extern unsigned IOCTL_EVIOCSABS;
   extern unsigned IOCTL_EVIOCSFF;
   extern unsigned IOCTL_EVIOCSKEYCODE;
   extern unsigned IOCTL_EVIOCSREP;
   extern unsigned IOCTL_BLKFLSBUF;
   extern unsigned IOCTL_BLKGETSIZE;
   extern unsigned IOCTL_BLKRAGET;
   extern unsigned IOCTL_BLKRASET;
   extern unsigned IOCTL_BLKROGET;
   extern unsigned IOCTL_BLKROSET;
   extern unsigned IOCTL_BLKRRPART;
   extern unsigned IOCTL_CDROMAUDIOBUFSIZ;
   extern unsigned IOCTL_CDROMEJECT;
   extern unsigned IOCTL_CDROMEJECT_SW;
   extern unsigned IOCTL_CDROMMULTISESSION;
   extern unsigned IOCTL_CDROMPAUSE;
   extern unsigned IOCTL_CDROMPLAYMSF;
   extern unsigned IOCTL_CDROMPLAYTRKIND;
   extern unsigned IOCTL_CDROMREADAUDIO;
   extern unsigned IOCTL_CDROMREADCOOKED;
   extern unsigned IOCTL_CDROMREADMODE1;
   extern unsigned IOCTL_CDROMREADMODE2;
   extern unsigned IOCTL_CDROMREADRAW;
   extern unsigned IOCTL_CDROMREADTOCENTRY;
   extern unsigned IOCTL_CDROMREADTOCHDR;
   extern unsigned IOCTL_CDROMRESET;
   extern unsigned IOCTL_CDROMRESUME;
   extern unsigned IOCTL_CDROMSEEK;
   extern unsigned IOCTL_CDROMSTART;
   extern unsigned IOCTL_CDROMSTOP;
   extern unsigned IOCTL_CDROMSUBCHNL;
   extern unsigned IOCTL_CDROMVOLCTRL;
   extern unsigned IOCTL_CDROMVOLREAD;
   extern unsigned IOCTL_CDROM_GET_UPC;
   extern unsigned IOCTL_FDCLRPRM;
   extern unsigned IOCTL_FDDEFPRM;
   extern unsigned IOCTL_FDFLUSH;
   extern unsigned IOCTL_FDFMTBEG;
   extern unsigned IOCTL_FDFMTEND;
   extern unsigned IOCTL_FDFMTTRK;
   extern unsigned IOCTL_FDGETDRVPRM;
   extern unsigned IOCTL_FDGETDRVSTAT;
   extern unsigned IOCTL_FDGETDRVTYP;
   extern unsigned IOCTL_FDGETFDCSTAT;
   extern unsigned IOCTL_FDGETMAXERRS;
   extern unsigned IOCTL_FDGETPRM;
   extern unsigned IOCTL_FDMSGOFF;
   extern unsigned IOCTL_FDMSGON;
   extern unsigned IOCTL_FDPOLLDRVSTAT;
   extern unsigned IOCTL_FDRAWCMD;
   extern unsigned IOCTL_FDRESET;
   extern unsigned IOCTL_FDSETDRVPRM;
   extern unsigned IOCTL_FDSETEMSGTRESH;
   extern unsigned IOCTL_FDSETMAXERRS;
   extern unsigned IOCTL_FDSETPRM;
   extern unsigned IOCTL_FDTWADDLE;
   extern unsigned IOCTL_FDWERRORCLR;
   extern unsigned IOCTL_FDWERRORGET;
   extern unsigned IOCTL_HDIO_DRIVE_CMD;
   extern unsigned IOCTL_HDIO_GETGEO;
   extern unsigned IOCTL_HDIO_GET_32BIT;
   extern unsigned IOCTL_HDIO_GET_DMA;
   extern unsigned IOCTL_HDIO_GET_IDENTITY;
   extern unsigned IOCTL_HDIO_GET_KEEPSETTINGS;
   extern unsigned IOCTL_HDIO_GET_MULTCOUNT;
   extern unsigned IOCTL_HDIO_GET_NOWERR;
   extern unsigned IOCTL_HDIO_GET_UNMASKINTR;
   extern unsigned IOCTL_HDIO_SET_32BIT;
   extern unsigned IOCTL_HDIO_SET_DMA;
   extern unsigned IOCTL_HDIO_SET_KEEPSETTINGS;
   extern unsigned IOCTL_HDIO_SET_MULTCOUNT;
   extern unsigned IOCTL_HDIO_SET_NOWERR;
   extern unsigned IOCTL_HDIO_SET_UNMASKINTR;
   extern unsigned IOCTL_MTIOCPOS;
   extern unsigned IOCTL_PPPIOCGASYNCMAP;
   extern unsigned IOCTL_PPPIOCGDEBUG;
   extern unsigned IOCTL_PPPIOCGFLAGS;
   extern unsigned IOCTL_PPPIOCGUNIT;
   extern unsigned IOCTL_PPPIOCGXASYNCMAP;
   extern unsigned IOCTL_PPPIOCSASYNCMAP;
   extern unsigned IOCTL_PPPIOCSDEBUG;
   extern unsigned IOCTL_PPPIOCSFLAGS;
   extern unsigned IOCTL_PPPIOCSMAXCID;
   extern unsigned IOCTL_PPPIOCSMRU;
   extern unsigned IOCTL_PPPIOCSXASYNCMAP;
   extern unsigned IOCTL_SIOCDARP;
   extern unsigned IOCTL_SIOCDRARP;
   extern unsigned IOCTL_SIOCGARP;
   extern unsigned IOCTL_SIOCGIFENCAP;
   extern unsigned IOCTL_SIOCGIFHWADDR;
   extern unsigned IOCTL_SIOCGIFMAP;
   extern unsigned IOCTL_SIOCGIFMEM;
   extern unsigned IOCTL_SIOCGIFNAME;
   extern unsigned IOCTL_SIOCGIFSLAVE;
   extern unsigned IOCTL_SIOCGRARP;
   extern unsigned IOCTL_SIOCGSTAMP;
   extern unsigned IOCTL_SIOCSARP;
   extern unsigned IOCTL_SIOCSIFENCAP;
   extern unsigned IOCTL_SIOCSIFHWADDR;
   extern unsigned IOCTL_SIOCSIFLINK;
   extern unsigned IOCTL_SIOCSIFMAP;
   extern unsigned IOCTL_SIOCSIFMEM;
   extern unsigned IOCTL_SIOCSIFSLAVE;
   extern unsigned IOCTL_SIOCSRARP;
   extern unsigned IOCTL_SNDCTL_COPR_HALT;
   extern unsigned IOCTL_SNDCTL_COPR_LOAD;
   extern unsigned IOCTL_SNDCTL_COPR_RCODE;
   extern unsigned IOCTL_SNDCTL_COPR_RCVMSG;
   extern unsigned IOCTL_SNDCTL_COPR_RDATA;
   extern unsigned IOCTL_SNDCTL_COPR_RESET;
   extern unsigned IOCTL_SNDCTL_COPR_RUN;
   extern unsigned IOCTL_SNDCTL_COPR_SENDMSG;
   extern unsigned IOCTL_SNDCTL_COPR_WCODE;
   extern unsigned IOCTL_SNDCTL_COPR_WDATA;
   extern unsigned IOCTL_TCFLSH;
   extern unsigned IOCTL_TCGETA;
   extern unsigned IOCTL_TCGETS;
   extern unsigned IOCTL_TCSBRK;
   extern unsigned IOCTL_TCSBRKP;
   extern unsigned IOCTL_TCSETA;
   extern unsigned IOCTL_TCSETAF;
   extern unsigned IOCTL_TCSETAW;
   extern unsigned IOCTL_TCSETS;
   extern unsigned IOCTL_TCSETSF;
   extern unsigned IOCTL_TCSETSW;
   extern unsigned IOCTL_TCXONC;
   extern unsigned IOCTL_TIOCGLCKTRMIOS;
   extern unsigned IOCTL_TIOCGSOFTCAR;
   extern unsigned IOCTL_TIOCINQ;
   extern unsigned IOCTL_TIOCLINUX;
   extern unsigned IOCTL_TIOCSERCONFIG;
   extern unsigned IOCTL_TIOCSERGETLSR;
   extern unsigned IOCTL_TIOCSERGWILD;
   extern unsigned IOCTL_TIOCSERSWILD;
   extern unsigned IOCTL_TIOCSLCKTRMIOS;
   extern unsigned IOCTL_TIOCSSOFTCAR;
   extern unsigned IOCTL_VT_DISALLOCATE;
   extern unsigned IOCTL_VT_GETSTATE;
   extern unsigned IOCTL_VT_RESIZE;
   extern unsigned IOCTL_VT_RESIZEX;
   extern unsigned IOCTL_VT_SENDSIG;
 #endif  // SANITIZER_LINUX
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
   extern unsigned IOCTL_MTIOCGET;
   extern unsigned IOCTL_MTIOCTOP;
   extern unsigned IOCTL_SIOCADDRT;
   extern unsigned IOCTL_SIOCDELRT;
   extern unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE;
   extern unsigned IOCTL_SNDCTL_DSP_GETFMTS;
   extern unsigned IOCTL_SNDCTL_DSP_NONBLOCK;
   extern unsigned IOCTL_SNDCTL_DSP_POST;
   extern unsigned IOCTL_SNDCTL_DSP_RESET;
   extern unsigned IOCTL_SNDCTL_DSP_SETFMT;
   extern unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT;
   extern unsigned IOCTL_SNDCTL_DSP_SPEED;
   extern unsigned IOCTL_SNDCTL_DSP_STEREO;
   extern unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE;
   extern unsigned IOCTL_SNDCTL_DSP_SYNC;
   extern unsigned IOCTL_SNDCTL_FM_4OP_ENABLE;
   extern unsigned IOCTL_SNDCTL_FM_LOAD_INSTR;
   extern unsigned IOCTL_SNDCTL_MIDI_INFO;
   extern unsigned IOCTL_SNDCTL_MIDI_PRETIME;
   extern unsigned IOCTL_SNDCTL_SEQ_CTRLRATE;
   extern unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT;
   extern unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT;
   extern unsigned IOCTL_SNDCTL_SEQ_NRMIDIS;
   extern unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS;
   extern unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND;
   extern unsigned IOCTL_SNDCTL_SEQ_PANIC;
   extern unsigned IOCTL_SNDCTL_SEQ_PERCMODE;
   extern unsigned IOCTL_SNDCTL_SEQ_RESET;
   extern unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES;
   extern unsigned IOCTL_SNDCTL_SEQ_SYNC;
   extern unsigned IOCTL_SNDCTL_SEQ_TESTMIDI;
   extern unsigned IOCTL_SNDCTL_SEQ_THRESHOLD;
   extern unsigned IOCTL_SNDCTL_SYNTH_INFO;
   extern unsigned IOCTL_SNDCTL_SYNTH_MEMAVL;
   extern unsigned IOCTL_SNDCTL_TMR_CONTINUE;
   extern unsigned IOCTL_SNDCTL_TMR_METRONOME;
   extern unsigned IOCTL_SNDCTL_TMR_SELECT;
   extern unsigned IOCTL_SNDCTL_TMR_SOURCE;
   extern unsigned IOCTL_SNDCTL_TMR_START;
   extern unsigned IOCTL_SNDCTL_TMR_STOP;
   extern unsigned IOCTL_SNDCTL_TMR_TEMPO;
   extern unsigned IOCTL_SNDCTL_TMR_TIMEBASE;
   extern unsigned IOCTL_SOUND_MIXER_READ_ALTPCM;
   extern unsigned IOCTL_SOUND_MIXER_READ_BASS;
   extern unsigned IOCTL_SOUND_MIXER_READ_CAPS;
   extern unsigned IOCTL_SOUND_MIXER_READ_CD;
   extern unsigned IOCTL_SOUND_MIXER_READ_DEVMASK;
   extern unsigned IOCTL_SOUND_MIXER_READ_ENHANCE;
   extern unsigned IOCTL_SOUND_MIXER_READ_IGAIN;
   extern unsigned IOCTL_SOUND_MIXER_READ_IMIX;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE1;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE2;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE3;
   extern unsigned IOCTL_SOUND_MIXER_READ_LINE;
   extern unsigned IOCTL_SOUND_MIXER_READ_LOUD;
   extern unsigned IOCTL_SOUND_MIXER_READ_MIC;
   extern unsigned IOCTL_SOUND_MIXER_READ_MUTE;
   extern unsigned IOCTL_SOUND_MIXER_READ_OGAIN;
   extern unsigned IOCTL_SOUND_MIXER_READ_PCM;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECLEV;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECMASK;
   extern unsigned IOCTL_SOUND_MIXER_READ_RECSRC;
   extern unsigned IOCTL_SOUND_MIXER_READ_SPEAKER;
   extern unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS;
   extern unsigned IOCTL_SOUND_MIXER_READ_SYNTH;
   extern unsigned IOCTL_SOUND_MIXER_READ_TREBLE;
   extern unsigned IOCTL_SOUND_MIXER_READ_VOLUME;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_BASS;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_CD;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_IMIX;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE1;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE2;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE3;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_LOUD;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_MIC;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_MUTE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_PCM;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE;
   extern unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME;
   extern unsigned IOCTL_SOUND_PCM_READ_BITS;
   extern unsigned IOCTL_SOUND_PCM_READ_CHANNELS;
   extern unsigned IOCTL_SOUND_PCM_READ_FILTER;
   extern unsigned IOCTL_SOUND_PCM_READ_RATE;
   extern unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS;
   extern unsigned IOCTL_SOUND_PCM_WRITE_FILTER;
   extern unsigned IOCTL_VT_ACTIVATE;
   extern unsigned IOCTL_VT_GETMODE;
   extern unsigned IOCTL_VT_OPENQRY;
   extern unsigned IOCTL_VT_RELDISP;
   extern unsigned IOCTL_VT_SETMODE;
   extern unsigned IOCTL_VT_WAITACTIVE;
 #endif  // SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   extern unsigned IOCTL_CYGETDEFTHRESH;
   extern unsigned IOCTL_CYGETDEFTIMEOUT;
   extern unsigned IOCTL_CYGETMON;
   extern unsigned IOCTL_CYGETTHRESH;
   extern unsigned IOCTL_CYGETTIMEOUT;
   extern unsigned IOCTL_CYSETDEFTHRESH;
   extern unsigned IOCTL_CYSETDEFTIMEOUT;
   extern unsigned IOCTL_CYSETTHRESH;
   extern unsigned IOCTL_CYSETTIMEOUT;
   extern unsigned IOCTL_EQL_EMANCIPATE;
   extern unsigned IOCTL_EQL_ENSLAVE;
   extern unsigned IOCTL_EQL_GETMASTRCFG;
   extern unsigned IOCTL_EQL_GETSLAVECFG;
   extern unsigned IOCTL_EQL_SETMASTRCFG;
   extern unsigned IOCTL_EQL_SETSLAVECFG;
   extern unsigned IOCTL_EVIOCGKEYCODE_V2;
   extern unsigned IOCTL_EVIOCGPROP;
   extern unsigned IOCTL_EVIOCSKEYCODE_V2;
   extern unsigned IOCTL_FS_IOC_GETFLAGS;
   extern unsigned IOCTL_FS_IOC_GETVERSION;
   extern unsigned IOCTL_FS_IOC_SETFLAGS;
   extern unsigned IOCTL_FS_IOC_SETVERSION;
   extern unsigned IOCTL_GIO_CMAP;
   extern unsigned IOCTL_GIO_FONT;
   extern unsigned IOCTL_GIO_UNIMAP;
   extern unsigned IOCTL_GIO_UNISCRNMAP;
   extern unsigned IOCTL_KDADDIO;
   extern unsigned IOCTL_KDDELIO;
   extern unsigned IOCTL_KDGETKEYCODE;
   extern unsigned IOCTL_KDGKBDIACR;
   extern unsigned IOCTL_KDGKBENT;
   extern unsigned IOCTL_KDGKBLED;
   extern unsigned IOCTL_KDGKBMETA;
   extern unsigned IOCTL_KDGKBSENT;
   extern unsigned IOCTL_KDMAPDISP;
   extern unsigned IOCTL_KDSETKEYCODE;
   extern unsigned IOCTL_KDSIGACCEPT;
   extern unsigned IOCTL_KDSKBDIACR;
   extern unsigned IOCTL_KDSKBENT;
   extern unsigned IOCTL_KDSKBLED;
   extern unsigned IOCTL_KDSKBMETA;
   extern unsigned IOCTL_KDSKBSENT;
   extern unsigned IOCTL_KDUNMAPDISP;
   extern unsigned IOCTL_LPABORT;
   extern unsigned IOCTL_LPABORTOPEN;
   extern unsigned IOCTL_LPCAREFUL;
   extern unsigned IOCTL_LPCHAR;
   extern unsigned IOCTL_LPGETIRQ;
   extern unsigned IOCTL_LPGETSTATUS;
   extern unsigned IOCTL_LPRESET;
   extern unsigned IOCTL_LPSETIRQ;
   extern unsigned IOCTL_LPTIME;
   extern unsigned IOCTL_LPWAIT;
   extern unsigned IOCTL_MTIOCGETCONFIG;
   extern unsigned IOCTL_MTIOCSETCONFIG;
   extern unsigned IOCTL_PIO_CMAP;
   extern unsigned IOCTL_PIO_FONT;
   extern unsigned IOCTL_PIO_UNIMAP;
   extern unsigned IOCTL_PIO_UNIMAPCLR;
   extern unsigned IOCTL_PIO_UNISCRNMAP;
   extern unsigned IOCTL_SCSI_IOCTL_GET_IDLUN;
   extern unsigned IOCTL_SCSI_IOCTL_PROBE_HOST;
   extern unsigned IOCTL_SCSI_IOCTL_TAGGED_DISABLE;
   extern unsigned IOCTL_SCSI_IOCTL_TAGGED_ENABLE;
   extern unsigned IOCTL_SIOCAIPXITFCRT;
   extern unsigned IOCTL_SIOCAIPXPRISLT;
   extern unsigned IOCTL_SIOCAX25ADDUID;
   extern unsigned IOCTL_SIOCAX25DELUID;
   extern unsigned IOCTL_SIOCAX25GETPARMS;
   extern unsigned IOCTL_SIOCAX25GETUID;
   extern unsigned IOCTL_SIOCAX25NOUID;
   extern unsigned IOCTL_SIOCAX25SETPARMS;
   extern unsigned IOCTL_SIOCDEVPLIP;
   extern unsigned IOCTL_SIOCIPXCFGDATA;
   extern unsigned IOCTL_SIOCNRDECOBS;
   extern unsigned IOCTL_SIOCNRGETPARMS;
   extern unsigned IOCTL_SIOCNRRTCTL;
   extern unsigned IOCTL_SIOCNRSETPARMS;
   extern unsigned IOCTL_SNDCTL_DSP_GETISPACE;
   extern unsigned IOCTL_SNDCTL_DSP_GETOSPACE;
   extern unsigned IOCTL_TIOCGSERIAL;
   extern unsigned IOCTL_TIOCSERGETMULTI;
   extern unsigned IOCTL_TIOCSERSETMULTI;
   extern unsigned IOCTL_TIOCSSERIAL;
 #endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
   extern unsigned IOCTL_GIO_SCRNMAP;
   extern unsigned IOCTL_KDDISABIO;
   extern unsigned IOCTL_KDENABIO;
   extern unsigned IOCTL_KDGETLED;
   extern unsigned IOCTL_KDGETMODE;
   extern unsigned IOCTL_KDGKBMODE;
   extern unsigned IOCTL_KDGKBTYPE;
   extern unsigned IOCTL_KDMKTONE;
   extern unsigned IOCTL_KDSETLED;
   extern unsigned IOCTL_KDSETMODE;
   extern unsigned IOCTL_KDSKBMODE;
   extern unsigned IOCTL_KIOCSOUND;
   extern unsigned IOCTL_PIO_SCRNMAP;
 #endif
 
   extern const int errno_EINVAL;
   extern const int errno_EOWNERDEAD;
 
   extern const int si_SEGV_MAPERR;
   extern const int si_SEGV_ACCERR;
 }  // namespace __sanitizer
 
 #define CHECK_TYPE_SIZE(TYPE) \
   COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE))
 
 #define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER)                       \
   COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *) NULL)->MEMBER) == \
                  sizeof(((CLASS *) NULL)->MEMBER));                \
   COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) ==          \
                  offsetof(CLASS, MEMBER))
 
 // For sigaction, which is a function and struct at the same time,
 // and thus requires explicit "struct" in sizeof() expression.
 #define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER)                       \
   COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *) NULL)->MEMBER) == \
                  sizeof(((struct CLASS *) NULL)->MEMBER));                \
   COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) ==          \
                  offsetof(struct CLASS, MEMBER))
 
 #endif
Index: head/contrib/openbsm/libbsm/bsm_wrappers.c
===================================================================
--- head/contrib/openbsm/libbsm/bsm_wrappers.c	(revision 318735)
+++ head/contrib/openbsm/libbsm/bsm_wrappers.c	(revision 318736)
@@ -1,851 +1,853 @@
 /*-
  * Copyright (c) 2004-2009 Apple Inc.
  * Copyright (c) 2016 Robert N. M. Watson
  * All rights reserved.
  *
  * Portions of this software were developed by BAE Systems, the University of
  * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
  * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
  * Computing (TC) research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1.  Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
  *     its contributors may be used to endorse or promote products derived
  *     from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifdef __APPLE__
 #define	_SYS_AUDIT_H		/* Prevent include of sys/audit.h. */
 #endif
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #ifdef __APPLE__
 #include <sys/queue.h>		/* Our bsm/audit.h doesn't include queue.h. */
 #endif
 
 #include <sys/sysctl.h>
 
 #include <bsm/libbsm.h>
 
 #include <unistd.h>
 #include <syslog.h>
 #include <stdarg.h>
 #include <string.h>
 #include <errno.h>
 
 /* These are not advertised in libbsm.h */
 int audit_set_terminal_port(dev_t *p);
 int audit_set_terminal_host(uint32_t *m);
 
 /*
  * General purpose audit submission mechanism for userspace.
  */
 int
 audit_submit(short au_event, au_id_t auid, char status,
     int reterr, const char *fmt, ...)
 {
 	char text[MAX_AUDITSTRING_LEN];
 	token_t *token;
 	int acond;
 	va_list ap;
 	pid_t pid;
 	int error, afd, subj_ex;
 	struct auditinfo ai;
 	struct auditinfo_addr aia;
 	au_tid_t atid;
 
 	if (audit_get_cond(&acond) != 0) {
 		/*
 		 * If auditon(2) returns ENOSYS, then audit has not been
 		 * compiled into the kernel, so just return.
 		 */
 		if (errno == ENOSYS)
 			return (0);
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR, "audit: auditon failed: %s",
 		    strerror(errno));
 		errno = error;
 		return (-1);
 	}
 	if (acond == AUC_NOAUDIT)
 		return (0);
 	afd = au_open();
 	if (afd < 0) {
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR, "audit: au_open failed: %s",
 		    strerror(errno));
 		errno = error;
 		return (-1);
 	}
 	/*
 	 * Try to use getaudit_addr(2) first.  If this kernel does not support
 	 * it, then fall back on to getaudit(2).
 	 */
 	subj_ex = 0;
 	error = getaudit_addr(&aia, sizeof(aia));
 	if (error < 0 && errno == ENOSYS) {
 		error = getaudit(&ai);
 		if (error < 0) {
 			error = errno;
 			syslog(LOG_AUTH | LOG_ERR, "audit: getaudit failed: %s",
 			    strerror(errno));
 			errno = error;
 			return (-1);
 		}
 		/*
 		 * Convert this auditinfo_t to an auditinfo_addr_t to make the
 		 * following code less complicated wrt to preselection and
 		 * subject token generation.
 		 */
 		aia.ai_auid = ai.ai_auid;
 		aia.ai_mask = ai.ai_mask;
 		aia.ai_asid = ai.ai_asid;
 		aia.ai_termid.at_type = AU_IPv4;
 		aia.ai_termid.at_addr[0] = ai.ai_termid.machine;
 		aia.ai_termid.at_port = ai.ai_termid.port;
 	} else if (error < 0) {
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR, "audit: getaudit_addr failed: %s",
 		    strerror(errno));
 		errno = error;
 		return (-1);
 	}
 	/*
 	 * NB: We should be performing pre-selection here now that we have the
 	 * masks for this process.
 	 */
 	if (aia.ai_termid.at_type == AU_IPv6)
 		subj_ex = 1;
 	pid = getpid();
 	if (subj_ex == 0) {
 		atid.port = aia.ai_termid.at_port;
 		atid.machine = aia.ai_termid.at_addr[0];
 		token = au_to_subject32(auid, geteuid(), getegid(),
 		    getuid(), getgid(), pid, pid, &atid);
 	} else
 		token = au_to_subject_ex(auid, geteuid(), getegid(),
 		    getuid(), getgid(), pid, pid, &aia.ai_termid);
 	if (token == NULL) {
 		syslog(LOG_AUTH | LOG_ERR,
 		    "audit: unable to build subject token");
 		(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 		errno = EPERM;
 		return (-1);
 	}
 	if (au_write(afd, token) < 0) {
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR,
 		    "audit: au_write failed: %s", strerror(errno));
 		(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 		errno = error;
 		return (-1);
 	}
 	if (fmt != NULL) {
 		va_start(ap, fmt);
 		(void) vsnprintf(text, MAX_AUDITSTRING_LEN, fmt, ap);
 		va_end(ap);
 		token = au_to_text(text);
 		if (token == NULL) {
 			syslog(LOG_AUTH | LOG_ERR,
 			    "audit: failed to generate text token");
 			(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 			errno = EPERM;
 			return (-1);
 		}
 		if (au_write(afd, token) < 0) {
 			error = errno;
 			syslog(LOG_AUTH | LOG_ERR,
 			    "audit: au_write failed: %s", strerror(errno));
 			(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 			errno = error;
 			return (-1);
 		}
 	}
 	token = au_to_return32(au_errno_to_bsm(status), reterr);
 	if (token == NULL) {
 		syslog(LOG_AUTH | LOG_ERR,
 		    "audit: unable to build return token");
 		(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 		errno = EPERM;
 		return (-1);
 	}
 	if (au_write(afd, token) < 0) {
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR,
 		    "audit: au_write failed: %s", strerror(errno));
 		(void) au_close(afd, AU_TO_NO_WRITE, au_event);
 		errno = error;
 		return (-1);
 	}
 	if (au_close(afd, AU_TO_WRITE, au_event) < 0) {
 		error = errno;
 		syslog(LOG_AUTH | LOG_ERR, "audit: record not committed");
 		errno = error;
 		return (-1);
 	}
 	return (0);
 }
 
 int
 audit_set_terminal_port(dev_t *p)
 {
 	struct stat st;
 
 	if (p == NULL)
 		return (kAUBadParamErr);
 
 #ifdef NODEV
 	*p = NODEV;
 #else
 	*p = -1;
 #endif
 
 	/* for /usr/bin/login, try fstat() first */
 	if (fstat(STDIN_FILENO, &st) != 0) {
 		if (errno != EBADF) {
 			syslog(LOG_ERR, "fstat() failed (%s)",
 			    strerror(errno));
 			return (kAUStatErr);
 		}
 		if (stat("/dev/console", &st) != 0) {
 			syslog(LOG_ERR, "stat() failed (%s)",
 			    strerror(errno));
 			return (kAUStatErr);
 		}
 	}
 	*p = st.st_rdev;
 	return (kAUNoErr);
 }
 
 int
 audit_set_terminal_host(uint32_t *m)
 {
 
 #ifdef KERN_HOSTID
 	int name[2] = { CTL_KERN, KERN_HOSTID };
 	size_t len;
 
 	if (m == NULL)
 		return (kAUBadParamErr);
 	*m = 0;
 	len = sizeof(*m);
 	if (sysctl(name, 2, m, &len, NULL, 0) != 0) {
 		syslog(LOG_ERR, "sysctl() failed (%s)", strerror(errno));
 		return (kAUSysctlErr);
 	}
 	return (kAUNoErr);
 #else
 	*m = -1;
 	return (kAUNoErr);
 #endif
 }
 
 int
 audit_set_terminal_id(au_tid_t *tid)
 {
+	dev_t port;
 	int ret;
 
 	if (tid == NULL)
 		return (kAUBadParamErr);
-	if ((ret = audit_set_terminal_port(&tid->port)) != kAUNoErr)
+	if ((ret = audit_set_terminal_port(&port)) != kAUNoErr)
 		return (ret);
+	tid->port = port;
 	return (audit_set_terminal_host(&tid->machine));
 }
 
 /*
  * This is OK for those callers who have only one token to write.  If you have
  * multiple tokens that logically form part of the same audit record, you need
  * to use the existing au_open()/au_write()/au_close() API:
  *
  * aufd = au_open();
  * tok = au_to_random_token_1(...);
  * au_write(aufd, tok);
  * tok = au_to_random_token_2(...);
  * au_write(aufd, tok);
  * ...
  * au_close(aufd, AU_TO_WRITE, AUE_your_event_type);
  *
  * Assumes, like all wrapper calls, that the caller has previously checked
  * that auditing is enabled via the audit_get_state() call.
  *
  * XXX: Should be more robust against bad arguments.
  */
 int
 audit_write(short event_code, token_t *subject, token_t *misctok, char retval,
     int errcode)
 {
 	int aufd;
 	char *func = "audit_write()";
 	token_t *rettok;
 
 	if ((aufd = au_open()) == -1) {
 		au_free_token(subject);
 		au_free_token(misctok);
 		syslog(LOG_ERR, "%s: au_open() failed", func);
 		return (kAUOpenErr);
 	}
 
 	/* Save subject. */
 	if (subject && au_write(aufd, subject) == -1) {
 		au_free_token(subject);
 		au_free_token(misctok);
 		(void)au_close(aufd, AU_TO_NO_WRITE, event_code);
 		syslog(LOG_ERR, "%s: write of subject failed", func);
 		return (kAUWriteSubjectTokErr);
 	}
 
 	/* Save the event-specific token. */
 	if (misctok && au_write(aufd, misctok) == -1) {
 		au_free_token(misctok);
 		(void)au_close(aufd, AU_TO_NO_WRITE, event_code);
 		syslog(LOG_ERR, "%s: write of caller token failed", func);
 		return (kAUWriteCallerTokErr);
 	}
 
 	/* Tokenize and save the return value. */
 	if ((rettok = au_to_return32(retval, errcode)) == NULL) {
 		(void)au_close(aufd, AU_TO_NO_WRITE, event_code);
 		syslog(LOG_ERR, "%s: au_to_return32() failed", func);
 		return (kAUMakeReturnTokErr);
 	}
 
 	if (au_write(aufd, rettok) == -1) {
 		au_free_token(rettok);
 		(void)au_close(aufd, AU_TO_NO_WRITE, event_code);
 		syslog(LOG_ERR, "%s: write of return code failed", func);
 		return (kAUWriteReturnTokErr);
 	}
 
 	/*
 	 * We assume the caller wouldn't have bothered with this
 	 * function if it hadn't already decided to keep the record.
 	 */
 	if (au_close(aufd, AU_TO_WRITE, event_code) < 0) {
 		syslog(LOG_ERR, "%s: au_close() failed", func);
 		return (kAUCloseErr);
 	}
 
 	return (kAUNoErr);
 }
 
 /*
  * Same caveats as audit_write().  In addition, this function explicitly
  * assumes success; use audit_write_failure() on error.
  */
 int
 audit_write_success(short event_code, token_t *tok, au_id_t auid, uid_t euid,
     gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid,
     au_tid_t *tid)
 {
 	char *func = "audit_write_success()";
 	token_t *subject = NULL;
 
 	/* Tokenize and save subject. */
 	subject = au_to_subject32(auid, euid, egid, ruid, rgid, pid, sid,
 	    tid);
 	if (subject == NULL) {
 		syslog(LOG_ERR, "%s: au_to_subject32() failed", func);
 		return kAUMakeSubjectTokErr;
 	}
 
 	return (audit_write(event_code, subject, tok, 0, 0));
 }
 
 /*
  * Same caveats as audit_write().  In addition, this function explicitly
  * assumes success; use audit_write_failure_self() on error.
  */
 int
 audit_write_success_self(short event_code, token_t *tok)
 {
 	token_t *subject;
 	char *func = "audit_write_success_self()";
 
 	if ((subject = au_to_me()) == NULL) {
 		syslog(LOG_ERR, "%s: au_to_me() failed", func);
 		return (kAUMakeSubjectTokErr);
 	}
 
 	return (audit_write(event_code, subject, tok, 0, 0));
 }
 
 /*
  * Same caveats as audit_write().  In addition, this function explicitly
  * assumes failure; use audit_write_success() otherwise.
  *
  * XXX  This should let the caller pass an error return value rather than
  * hard-coding -1.
  */
 int
 audit_write_failure(short event_code, char *errmsg, int errcode, au_id_t auid,
     uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid,
     au_tid_t *tid)
 {
 	char *func = "audit_write_failure()";
 	token_t *subject, *errtok;
 
 	subject = au_to_subject32(auid, euid, egid, ruid, rgid, pid, sid, tid);
 	if (subject == NULL) {
 		syslog(LOG_ERR, "%s: au_to_subject32() failed", func);
 		return (kAUMakeSubjectTokErr);
 	}
 
 	/* tokenize and save the error message */
 	if ((errtok = au_to_text(errmsg)) == NULL) {
 		au_free_token(subject);
 		syslog(LOG_ERR, "%s: au_to_text() failed", func);
 		return (kAUMakeTextTokErr);
 	}
 
 	return (audit_write(event_code, subject, errtok, -1, errcode));
 }
 
 /*
  * Same caveats as audit_write().  In addition, this function explicitly
  * assumes failure; use audit_write_success_self() otherwise.
  *
  * XXX  This should let the caller pass an error return value rather than
  * hard-coding -1.
  */
 int
 audit_write_failure_self(short event_code, char *errmsg, int errret)
 {
 	char *func = "audit_write_failure_self()";
 	token_t *subject, *errtok;
 
 	if ((subject = au_to_me()) == NULL) {
 		syslog(LOG_ERR, "%s: au_to_me() failed", func);
 		return (kAUMakeSubjectTokErr);
 	}
 	/* tokenize and save the error message */
 	if ((errtok = au_to_text(errmsg)) == NULL) {
 		au_free_token(subject);
 		syslog(LOG_ERR, "%s: au_to_text() failed", func);
 		return (kAUMakeTextTokErr);
 	}
 	return (audit_write(event_code, subject, errtok, -1, errret));
 }
 
 /*
  * For auditing errors during login.  Such errors are implicitly
  * non-attributable (i.e., not ascribable to any user).
  *
  * Assumes, like all wrapper calls, that the caller has previously checked
  * that auditing is enabled via the audit_get_state() call.
  */
 int
 audit_write_failure_na(short event_code, char *errmsg, int errret, uid_t euid,
     uid_t egid, pid_t pid, au_tid_t *tid)
 {
 
 	return (audit_write_failure(event_code, errmsg, errret, -1, euid,
 	    egid, -1, -1, pid, -1, tid));
 }
 
 /* END OF au_write() WRAPPERS */
 
 #ifdef __APPLE__
 void
 audit_token_to_au32(audit_token_t atoken, uid_t *auidp, uid_t *euidp,
     gid_t *egidp, uid_t *ruidp, gid_t *rgidp, pid_t *pidp, au_asid_t *asidp,
     au_tid_t *tidp)
 {
 
 	if (auidp != NULL)
 		*auidp = (uid_t)atoken.val[0];
 	if (euidp != NULL)
 		*euidp = (uid_t)atoken.val[1];
 	if (egidp != NULL)
 		*egidp = (gid_t)atoken.val[2];
 	if (ruidp != NULL)
 		*ruidp = (uid_t)atoken.val[3];
 	if (rgidp != NULL)
 		*rgidp = (gid_t)atoken.val[4];
 	if (pidp != NULL)
 		*pidp = (pid_t)atoken.val[5];
 	if (asidp != NULL)
 		*asidp = (au_asid_t)atoken.val[6];
 	if (tidp != NULL) {
 		audit_set_terminal_host(&tidp->machine);
 		tidp->port = (dev_t)atoken.val[7];
 	}
 }
 #endif /* !__APPLE__ */
 
 int
 audit_get_cond(int *cond)
 {
 	int ret;
 
 	ret = auditon(A_GETCOND, cond, sizeof(*cond));
 #ifdef A_OLDGETCOND
 	if ((0 != ret) && EINVAL == errno) {
 		long lcond = *cond;
 
 		ret = auditon(A_OLDGETCOND, &lcond, sizeof(lcond));
 		*cond = (int)lcond;
 	}
 #endif
 	return (ret);
 }
 
 int 
 audit_set_cond(int *cond)
 {
 	int ret;
 
 	ret = auditon(A_SETCOND, cond, sizeof(*cond));
 #ifdef A_OLDSETCOND
 	if ((0 != ret) && (EINVAL == errno)) {
 		long lcond = (long)*cond;
 
 		ret = auditon(A_OLDSETCOND, &lcond, sizeof(lcond)); 
 		*cond = (int)lcond;
 	}
 #endif
 	return (ret);
 }
 
 int
 audit_get_policy(int *policy)
 {
 	int ret;
 
 	ret = auditon(A_GETPOLICY, policy, sizeof(*policy));
 #ifdef A_OLDGETPOLICY
 	if ((0 != ret) && (EINVAL == errno)){
 		long lpolicy = (long)*policy;
 
 		ret = auditon(A_OLDGETPOLICY, &lpolicy, sizeof(lpolicy)); 
 		*policy = (int)lpolicy;
 	}
 #endif
 	return (ret);
 }
 
 int 
 audit_set_policy(int *policy)
 {
 	int ret;
 
 	ret = auditon(A_SETPOLICY, policy, sizeof(*policy));
 #ifdef A_OLDSETPOLICY
 	if ((0 != ret) && (EINVAL == errno)){
 		long lpolicy = (long)*policy;
 
 		ret = auditon(A_OLDSETPOLICY, &lpolicy, sizeof(lpolicy)); 
 		*policy = (int)lpolicy;
 	}
 #endif
 	return (ret);
 }
 
 int
 audit_get_qctrl(au_qctrl_t *qctrl, size_t sz)
 {
 	int ret;
 
 	if (sizeof(*qctrl) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	ret = auditon(A_GETQCTRL, qctrl, sizeof(*qctrl));
 #ifdef A_OLDGETQCTRL
 	if ((0 != ret) && (EINVAL == errno)){
 		struct old_qctrl {
 			size_t   oq_hiwater;
 			size_t   oq_lowater;
 			size_t   oq_bufsz;
 			clock_t  oq_delay;
 			int	 oq_minfree;
 		} oq;
 
 		oq.oq_hiwater = (size_t)qctrl->aq_hiwater;
 		oq.oq_lowater = (size_t)qctrl->aq_lowater;
 		oq.oq_bufsz = (size_t)qctrl->aq_bufsz;
 		oq.oq_delay = (clock_t)qctrl->aq_delay;
 		oq.oq_minfree = qctrl->aq_minfree;
 
 		ret = auditon(A_OLDGETQCTRL, &oq, sizeof(oq)); 
 
 		qctrl->aq_hiwater = (int)oq.oq_hiwater;
 		qctrl->aq_lowater = (int)oq.oq_lowater;
 		qctrl->aq_bufsz = (int)oq.oq_bufsz;
 		qctrl->aq_delay = (int)oq.oq_delay;
 		qctrl->aq_minfree = oq.oq_minfree;
 	}
 #endif /* A_OLDGETQCTRL */
 	return (ret);
 }
 
 int
 audit_set_qctrl(au_qctrl_t *qctrl, size_t sz)
 {
 	int ret;
 
 	if (sizeof(*qctrl) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	ret = auditon(A_SETQCTRL, qctrl, sz); 
 #ifdef	A_OLDSETQCTRL
 	if ((0 != ret) && (EINVAL == errno)) {
 		struct old_qctrl {
 			size_t   oq_hiwater;
 			size_t   oq_lowater;
 			size_t   oq_bufsz;
 			clock_t  oq_delay;
 			int	 oq_minfree;
 		} oq;
 
 		oq.oq_hiwater = (size_t)qctrl->aq_hiwater;
 		oq.oq_lowater = (size_t)qctrl->aq_lowater;
 		oq.oq_bufsz = (size_t)qctrl->aq_bufsz;
 		oq.oq_delay = (clock_t)qctrl->aq_delay;
 		oq.oq_minfree = qctrl->aq_minfree;
 
 		ret = auditon(A_OLDSETQCTRL, &oq, sizeof(oq)); 
 
 		qctrl->aq_hiwater = (int)oq.oq_hiwater;
 		qctrl->aq_lowater = (int)oq.oq_lowater;
 		qctrl->aq_bufsz = (int)oq.oq_bufsz;
 		qctrl->aq_delay = (int)oq.oq_delay;
 		qctrl->aq_minfree = oq.oq_minfree;
 	}
 #endif /* A_OLDSETQCTRL */
 	return (ret);
 }
 
 int
 audit_send_trigger(int *trigger)
 {
 
 	return (auditon(A_SENDTRIGGER, trigger, sizeof(*trigger)));
 }
 
 int
 audit_get_kaudit(auditinfo_addr_t *aia, size_t sz)
 {
 
 	if (sizeof(*aia) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETKAUDIT, aia, sz));
 }
 
 int
 audit_set_kaudit(auditinfo_addr_t *aia, size_t sz)
 {
 
 	if (sizeof(*aia) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETKAUDIT, aia, sz));
 }
 
 int
 audit_get_class(au_evclass_map_t *evc_map, size_t sz)
 {
 
 	if (sizeof(*evc_map) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETCLASS, evc_map, sz));
 }
 
 int
 audit_set_class(au_evclass_map_t *evc_map, size_t sz) 
 {
 
 	if (sizeof(*evc_map) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETCLASS, evc_map, sz));
 }
 
 int
 audit_get_event(au_evname_map_t *evn_map, size_t sz)
 {
 
 	if (sizeof(*evn_map) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETEVENT, evn_map, sz));
 }
 
 int
 audit_set_event(au_evname_map_t *evn_map, size_t sz)
 {
 
 	if (sizeof(*evn_map) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETEVENT, evn_map, sz));
 }
 
 int
 audit_get_kmask(au_mask_t *kmask, size_t sz)
 {
 	if (sizeof(*kmask) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETKMASK, kmask, sz));
 }
 
 int
 audit_set_kmask(au_mask_t *kmask, size_t sz)
 {
 	if (sizeof(*kmask) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETKMASK, kmask, sz));
 }
 
 int
 audit_get_fsize(au_fstat_t *fstat, size_t sz)
 {
 
 	if (sizeof(*fstat) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETFSIZE, fstat, sz));
 }
 
 int
 audit_set_fsize(au_fstat_t *fstat, size_t sz)
 {
 
 	if (sizeof(*fstat) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETFSIZE, fstat, sz));
 }
 
 int
 audit_set_pmask(auditpinfo_t *api, size_t sz)
 {
 	
 	if (sizeof(*api) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_SETPMASK, api, sz));
 }
 
 int 
 audit_get_pinfo(auditpinfo_t *api, size_t sz)
 {
 	
 	if (sizeof(*api) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETPINFO, api, sz));
 }
 
 int
 audit_get_pinfo_addr(auditpinfo_addr_t *apia, size_t sz)
 {
 	
 	if (sizeof(*apia) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETPINFO_ADDR, apia, sz));
 }
 
 int
 audit_get_sinfo_addr(auditinfo_addr_t *aia, size_t sz)
 {
 	
 	if (sizeof(*aia) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETSINFO_ADDR, aia, sz));
 }
 
 int
 audit_get_stat(au_stat_t *stats, size_t sz)
 {
 
 	if (sizeof(*stats) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETSTAT, stats, sz));
 }
 
 int
 audit_set_stat(au_stat_t *stats, size_t sz)
 {
 
 	if (sizeof(*stats) != sz) {
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (auditon(A_GETSTAT, stats, sz));
 }
 
 int
 audit_get_cwd(char *path, size_t sz)
 {
 
 	return (auditon(A_GETCWD, path, sz));
 }
 
 int
 audit_get_car(char *path, size_t sz)
 {
 
 	return (auditon(A_GETCAR, path, sz));
 }
Index: head/include/dirent.h
===================================================================
--- head/include/dirent.h	(revision 318735)
+++ head/include/dirent.h	(revision 318736)
@@ -1,122 +1,141 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)dirent.h	8.2 (Berkeley) 7/28/94
  * $FreeBSD$
  */
 
 #ifndef _DIRENT_H_
 #define _DIRENT_H_
 
 /*
  * The kernel defines the format of directory entries returned by
  * the getdirentries(2) system call.
  */
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 #include <sys/dirent.h>
 
+#if __BSD_VISIBLE
+
+#ifndef _SIZE_T_DECLARED
+typedef	__size_t	size_t;
+#define	_SIZE_T_DECLARED
+#endif
+
+#ifndef _SSIZE_T_DECLARED
+typedef	__ssize_t	ssize_t;
+#define	_SSIZE_T_DECLARED
+#endif
+
+#ifndef _OFF_T_DECLARED
+typedef	__off_t		off_t;
+#define	_OFF_T_DECLARED
+#endif
+
+#endif /* __BSD_VISIBLE */
+
 #if __XSI_VISIBLE
 
 #ifndef _INO_T_DECLARED
 typedef	__ino_t		ino_t;
 #define	_INO_T_DECLARED
 #endif
 
 /*
  * XXX this is probably illegal in the __XSI_VISIBLE case, but brings us closer
  * to the specification.
  */
 #define	d_ino		d_fileno	/* backward and XSI compatibility */
 
 #endif /* __XSI_VISIBLE */
 
 #if __BSD_VISIBLE
 
 #include <sys/_null.h>
 
 /* definitions for library routines operating on directories. */
 #define	DIRBLKSIZ	1024
 
 struct _dirdesc;
 typedef struct _dirdesc DIR;
 
 /* flags for opendir2 */
 #define DTF_HIDEW	0x0001	/* hide whiteout entries */
 #define DTF_NODUP	0x0002	/* don't return duplicate names */
 #define DTF_REWIND	0x0004	/* rewind after reading union stack */
 #define __DTF_READALL	0x0008	/* everything has been read */
 #define	__DTF_SKIPREAD	0x0010  /* assume internal buffer is populated */
 
 #else /* !__BSD_VISIBLE */
 
 typedef	void *	DIR;
 
 #endif /* __BSD_VISIBLE */
 
 #ifndef _KERNEL
 
 __BEGIN_DECLS
 #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700
 int	 alphasort(const struct dirent **, const struct dirent **);
 int	 dirfd(DIR *);
 #endif
 #if __BSD_VISIBLE
 DIR	*__opendir2(const char *, int);
 int	 fdclosedir(DIR *);
-int	 getdents(int, char *, int);
-int	 getdirentries(int, char *, int, long *);
+ssize_t	 getdents(int, char *, size_t);
+ssize_t	 getdirentries(int, char *, size_t, off_t *);
 #endif
 DIR	*opendir(const char *);
 DIR	*fdopendir(int);
 struct dirent *
 	 readdir(DIR *);
 #if __POSIX_VISIBLE >= 199506 || __XSI_VISIBLE >= 500
 int	 readdir_r(DIR *, struct dirent *, struct dirent **);
 #endif
 void	 rewinddir(DIR *);
 #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700
 int	 scandir(const char *, struct dirent ***,
 	    int (*)(const struct dirent *), int (*)(const struct dirent **,
 	    const struct dirent **));
 #ifdef __BLOCKS__
 int	 scandir_b(const char *, struct dirent ***,
 	    int (^)(const struct dirent *),
 	    int (^)(const struct dirent **, const struct dirent **));
 #endif
 #endif
 #if __XSI_VISIBLE
 void	 seekdir(DIR *, long);
 long	 telldir(DIR *);
 #endif
 int	 closedir(DIR *);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #endif /* !_DIRENT_H_ */
Index: head/lib/libarchive/Makefile
===================================================================
--- head/lib/libarchive/Makefile	(revision 318735)
+++ head/lib/libarchive/Makefile	(revision 318736)
@@ -1,420 +1,420 @@
 # $FreeBSD$
 .include <src.opts.mk>
 
 PACKAGE=lib${LIB}
 _LIBARCHIVEDIR=	${SRCTOP}/contrib/libarchive
 
 LIB=	archive
 
 LIBADD=	z bz2 lzma bsdxml
 CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1
 
 # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system.
 # It has no real relation to the libarchive version number.
-SHLIB_MAJOR= 6
+SHLIB_MAJOR= 7
 
 CFLAGS+=	-DPLATFORM_CONFIG_H=\"${.CURDIR}/config_freebsd.h\"
 CFLAGS+=	-I${.OBJDIR}
 
 .if ${MK_OPENSSL} != "no"
 CFLAGS+=	-DWITH_OPENSSL
 LIBADD+=	crypto
 .else
 LIBADD+=	md
 .endif
 
 .if ${MK_ICONV} != "no"
 # TODO: This can be changed back to CFLAGS once iconv works correctly
 # with statically linked binaries.
 SHARED_CFLAGS+=	-DHAVE_ICONV=1 -DHAVE_ICONV_H=1 -DICONV_CONST=
 .endif
 
 .if ${MACHINE_ARCH:Marm*} != "" || ${MACHINE_ARCH:Mmips*} != "" || \
 	${MACHINE_ARCH:Msparc64*} != "" || ${MACHINE_ARCH:Mpowerpc*} != ""
 NO_WCAST_ALIGN=	yes
 .if ${MACHINE_ARCH:M*64*} == ""
 CFLAGS+=	-DPPMD_32BIT
 .endif
 .endif
 NO_WCAST_ALIGN.clang=
 
 .PATH: ${_LIBARCHIVEDIR}/libarchive
 
 # Headers to be installed in /usr/include
 INCS=	archive.h archive_entry.h
 
 # Sources to be compiled.
 SRCS=	archive_acl.c					\
 	archive_check_magic.c				\
 	archive_cmdline.c				\
 	archive_cryptor.c				\
 	archive_disk_acl_freebsd.c			\
 	archive_digest.c				\
 	archive_entry.c					\
 	archive_entry_copy_stat.c			\
 	archive_entry_link_resolver.c			\
 	archive_entry_sparse.c				\
 	archive_entry_stat.c				\
 	archive_entry_strmode.c				\
 	archive_entry_xattr.c				\
 	archive_getdate.c				\
 	archive_hmac.c					\
 	archive_match.c					\
 	archive_options.c				\
 	archive_pack_dev.c				\
 	archive_pathmatch.c				\
 	archive_ppmd7.c					\
 	archive_random.c				\
 	archive_rb.c					\
 	archive_read.c					\
 	archive_read_add_passphrase.c			\
 	archive_read_append_filter.c			\
 	archive_read_data_into_fd.c			\
 	archive_read_disk_entry_from_file.c		\
 	archive_read_disk_posix.c			\
 	archive_read_disk_set_standard_lookup.c		\
 	archive_read_extract.c				\
 	archive_read_extract2.c				\
 	archive_read_open_fd.c				\
 	archive_read_open_file.c			\
 	archive_read_open_filename.c			\
 	archive_read_open_memory.c			\
 	archive_read_set_format.c			\
 	archive_read_set_options.c			\
 	archive_read_support_filter_all.c		\
 	archive_read_support_filter_bzip2.c		\
 	archive_read_support_filter_compress.c		\
 	archive_read_support_filter_gzip.c		\
 	archive_read_support_filter_grzip.c		\
 	archive_read_support_filter_lrzip.c		\
 	archive_read_support_filter_lz4.c		\
 	archive_read_support_filter_lzop.c		\
 	archive_read_support_filter_none.c		\
 	archive_read_support_filter_program.c		\
 	archive_read_support_filter_rpm.c		\
 	archive_read_support_filter_uu.c		\
 	archive_read_support_filter_xz.c		\
 	archive_read_support_format_7zip.c		\
 	archive_read_support_format_all.c		\
 	archive_read_support_format_ar.c		\
 	archive_read_support_format_by_code.c		\
 	archive_read_support_format_cab.c		\
 	archive_read_support_format_cpio.c		\
 	archive_read_support_format_empty.c		\
 	archive_read_support_format_iso9660.c		\
 	archive_read_support_format_lha.c		\
 	archive_read_support_format_mtree.c		\
 	archive_read_support_format_rar.c		\
 	archive_read_support_format_raw.c		\
 	archive_read_support_format_tar.c		\
 	archive_read_support_format_warc.c		\
 	archive_read_support_format_xar.c		\
 	archive_read_support_format_zip.c		\
 	archive_string.c				\
 	archive_string_sprintf.c			\
 	archive_util.c					\
 	archive_version_details.c			\
 	archive_virtual.c				\
 	archive_write.c					\
 	archive_write_add_filter.c			\
 	archive_write_disk_set_standard_lookup.c	\
 	archive_write_disk_posix.c			\
 	archive_write_open_fd.c				\
 	archive_write_open_file.c			\
 	archive_write_open_filename.c			\
 	archive_write_open_memory.c			\
 	archive_write_add_filter_b64encode.c		\
 	archive_write_add_filter_by_name.c		\
 	archive_write_add_filter_bzip2.c		\
 	archive_write_add_filter_compress.c		\
 	archive_write_add_filter_grzip.c		\
 	archive_write_add_filter_gzip.c			\
 	archive_write_add_filter_lrzip.c		\
 	archive_write_add_filter_lz4.c			\
 	archive_write_add_filter_lzop.c			\
 	archive_write_add_filter_none.c			\
 	archive_write_add_filter_program.c		\
 	archive_write_add_filter_uuencode.c		\
 	archive_write_add_filter_xz.c			\
 	archive_write_set_format.c			\
 	archive_write_set_format_7zip.c			\
 	archive_write_set_format_ar.c			\
 	archive_write_set_format_by_name.c		\
 	archive_write_set_format_cpio.c			\
 	archive_write_set_format_cpio_newc.c		\
 	archive_write_set_format_filter_by_ext.c	\
 	archive_write_set_format_gnutar.c		\
 	archive_write_set_format_iso9660.c		\
 	archive_write_set_format_mtree.c		\
 	archive_write_set_format_pax.c			\
 	archive_write_set_format_raw.c			\
 	archive_write_set_format_shar.c			\
 	archive_write_set_format_ustar.c		\
 	archive_write_set_format_v7tar.c		\
 	archive_write_set_format_warc.c			\
 	archive_write_set_format_xar.c			\
 	archive_write_set_format_zip.c			\
 	archive_write_set_passphrase.c			\
 	archive_write_set_options.c			\
 	filter_fork_posix.c
 
 # Man pages to be installed.
 MAN=	archive_entry.3					\
 	archive_entry_acl.3				\
 	archive_entry_linkify.3				\
 	archive_entry_paths.3				\
 	archive_entry_perms.3				\
 	archive_entry_stat.3				\
 	archive_entry_time.3				\
 	archive_read.3					\
 	archive_read_data.3				\
 	archive_read_disk.3				\
 	archive_read_extract.3				\
 	archive_read_filter.3				\
 	archive_read_format.3				\
 	archive_read_free.3				\
 	archive_read_header.3				\
 	archive_read_new.3				\
 	archive_read_open.3				\
 	archive_read_set_options.3			\
 	archive_util.3					\
 	archive_write.3					\
 	archive_write_blocksize.3			\
 	archive_write_data.3				\
 	archive_write_disk.3				\
 	archive_write_filter.3				\
 	archive_write_finish_entry.3			\
 	archive_write_format.3				\
 	archive_write_free.3				\
 	archive_write_header.3				\
 	archive_write_new.3				\
 	archive_write_open.3				\
 	archive_write_set_options.3			\
 	cpio.5						\
 	libarchive.3					\
 	libarchive_changes.3				\
 	libarchive_internals.3				\
 	libarchive-formats.5				\
 	tar.5
 
 # Symlink the man pages under each function name.
 MLINKS+=	archive_entry.3 archive_entry_clear.3
 MLINKS+=	archive_entry.3 archive_entry_clone.3
 MLINKS+=	archive_entry.3 archive_entry_free.3
 MLINKS+=	archive_entry.3 archive_entry_new.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_add_entry.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_add_entry_w.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_clear.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_count.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_next.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_next_w.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_reset.3
 MLINKS+=	archive_entry_acl.3 archive_entry_acl_text_w.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_new.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_set_strategy.3
 MLINKS+=	archive_entry_linkify.3 archive_entry_linkresolver_free.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_hardlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_link.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_link_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_pathname_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_sourcepath.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_copy_symlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_hardlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_pathname_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_hardlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_link.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_pathname.3
 MLINKS+=	archive_entry_paths.3 archive_entry_set_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_symlink.3
 MLINKS+=	archive_entry_paths.3 archive_entry_symlink_w.3
 MLINKS+=	archive_entry_paths.3 archive_entry_update_symlink_utf8.3
 MLINKS+=	archive_entry_paths.3 archive_entry_update_hardlink_utf8.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_fflags_text.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_fflags_text_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_gname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_copy_uname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_fflags.3
 MLINKS+=	archive_entry_perms.3 archive_entry_fflags_text.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_gname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_fflags.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_gid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_gname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_perm.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_perm.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_uid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_set_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_strmode.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uid.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uname.3
 MLINKS+=	archive_entry_perms.3 archive_entry_uname_w.3
 MLINKS+=	archive_entry_perms.3 archive_entry_update_gname_utf8.3
 MLINKS+=	archive_entry_perms.3 archive_entry_update_uname_utf8.3
 MLINKS+=	archive_entry_stat.3 archive_entry_copy_stat.3
 MLINKS+=	archive_entry_stat.3 archive_entry_dev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_dev_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_devmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_devminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_filetype.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino64.3
 MLINKS+=	archive_entry_stat.3 archive_entry_ino_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_mode.3
 MLINKS+=	archive_entry_stat.3 archive_entry_nlink.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdevmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_rdevminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_dev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_devmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_devminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_filetype.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_ino.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_ino64.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_mode.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_nlink.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdev.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdevmajor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_rdevminor.3
 MLINKS+=	archive_entry_stat.3 archive_entry_set_size.3
 MLINKS+=	archive_entry_stat.3 archive_entry_size.3
 MLINKS+=	archive_entry_stat.3 archive_entry_size_is_set.3
 MLINKS+=	archive_entry_stat.3 archive_entry_unset_size.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_atime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_birthtime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_ctime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime_is_set.3
 MLINKS+=	archive_entry_time.3 archive_entry_mtime_nsec.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_set_mtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_atime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_birthtime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_ctime.3
 MLINKS+=	archive_entry_time.3 archive_entry_unset_mtime.3
 MLINKS+=	archive_read_data.3 archive_read_data_block.3
 MLINKS+=	archive_read_data.3 archive_read_data_into_fd.3
 MLINKS+=	archive_read_data.3 archive_read_data_skip.3
 MLINKS+=	archive_read_header.3 archive_read_next_header.3
 MLINKS+=	archive_read_header.3 archive_read_next_header2.3
 MLINKS+=	archive_read_extract.3 archive_read_extract2.3
 MLINKS+=	archive_read_extract.3 archive_read_extract_set_progress_callback.3
 MLINKS+=	archive_read_extract.3 archive_read_extract_set_skip_file.3
 MLINKS+=	archive_read_open.3 archive_read_open2.3
 MLINKS+=	archive_read_open.3 archive_read_open_FILE.3
 MLINKS+=	archive_read_open.3 archive_read_open_fd.3
 MLINKS+=	archive_read_open.3 archive_read_open_file.3
 MLINKS+=	archive_read_open.3 archive_read_open_filename.3
 MLINKS+=	archive_read_open.3 archive_read_open_memory.3
 MLINKS+=	archive_read_free.3 archive_read_close.3
 MLINKS+=	archive_read_free.3 archive_read_finish.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_all.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_bzip2.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_compress.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_gzip.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_lzma.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_none.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_xz.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_program.3
 MLINKS+=	archive_read_filter.3 archive_read_support_filter_program_signature.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_7zip.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_all.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_ar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_by_code.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_cab.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_cpio.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_empty.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_iso9660.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_lha.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_mtree.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_rar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_raw.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_tar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_xar.3
 MLINKS+=	archive_read_format.3 archive_read_support_format_zip.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_entry_from_file.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_gname.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_new.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_gname_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_standard_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_hybrid.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_logical.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_symlink_physical.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_set_uname_lookup.3
 MLINKS+=	archive_read_disk.3 archive_read_disk_uname.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_filter_option.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_format_option.3
 MLINKS+=	archive_read_set_options.3 archive_read_set_option.3
 MLINKS+=	archive_util.3 archive_clear_error.3
 MLINKS+=	archive_util.3 archive_compression.3
 MLINKS+=	archive_util.3 archive_compression_name.3
 MLINKS+=	archive_util.3 archive_copy_error.3
 MLINKS+=	archive_util.3 archive_errno.3
 MLINKS+=	archive_util.3 archive_error_string.3
 MLINKS+=	archive_util.3 archive_file_count.3
 MLINKS+=	archive_util.3 archive_filter_code.3
 MLINKS+=	archive_util.3 archive_filter_count.3
 MLINKS+=	archive_util.3 archive_filter_name.3
 MLINKS+=	archive_util.3 archive_format.3
 MLINKS+=	archive_util.3 archive_format_name.3
 MLINKS+=	archive_util.3 archive_position.3
 MLINKS+=	archive_util.3 archive_set_error.3
 MLINKS+=	archive_write_blocksize.3 archive_write_get_bytes_in_last_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_get_bytes_per_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_set_bytes_in_last_block.3
 MLINKS+=	archive_write_blocksize.3 archive_write_set_bytes_per_block.3
 MLINKS+=	archive_write_disk.3 archive_write_data_block.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_new.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_group_lookup.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_options.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_skip_file.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_standard_lookup.3
 MLINKS+=	archive_write_disk.3 archive_write_disk_set_user_lookup.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_bzip2.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_compress.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_gzip.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_lzip.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_lzma.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_none.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_program.3
 MLINKS+=	archive_write_filter.3 archive_write_add_filter_xz.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_cpio.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_pax.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_pax_restricted.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_shar.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_shar_dump.3
 MLINKS+=	archive_write_format.3 archive_write_set_format_ustar.3
 MLINKS+=	archive_write_free.3 archive_write_close.3
 MLINKS+=	archive_write_free.3 archive_write_fail.3
 MLINKS+=	archive_write_free.3 archive_write_finish.3
 MLINKS+=	archive_write_open.3 archive_write_open_FILE.3
 MLINKS+=	archive_write_open.3 archive_write_open_fd.3
 MLINKS+=	archive_write_open.3 archive_write_open_file.3
 MLINKS+=	archive_write_open.3 archive_write_open_filename.3
 MLINKS+=	archive_write_open.3 archive_write_open_memory.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_filter_option.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_format_option.3
 MLINKS+=	archive_write_set_options.3 archive_write_set_option.3
 MLINKS+=	libarchive.3 archive.3
 
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 
 .include <bsd.lib.mk>
Index: head/lib/libc/gen/Makefile.inc
===================================================================
--- head/lib/libc/gen/Makefile.inc	(revision 318735)
+++ head/lib/libc/gen/Makefile.inc	(revision 318736)
@@ -1,525 +1,533 @@
 #	@(#)Makefile.inc	8.6 (Berkeley) 5/4/95
 # $FreeBSD$
 
 # machine-independent gen sources
 .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/gen ${LIBC_SRCTOP}/gen
 
 SRCS+=	__getosreldate.c \
 	__pthread_mutex_init_calloc_cb_stub.c \
 	__xuname.c \
 	_once_stub.c \
 	_pthread_stubs.c \
 	_rand48.c \
 	_spinlock_stub.c \
 	_thread_init.c \
 	alarm.c \
 	arc4random.c \
 	assert.c \
 	auxv.c \
 	basename.c \
 	basename_compat.c \
 	cap_sandboxed.c \
 	check_utility_compat.c \
 	clock.c \
 	clock_getcpuclockid.c \
 	closedir.c \
 	confstr.c \
 	crypt.c \
 	ctermid.c \
 	daemon.c \
 	devname.c \
 	dirfd.c \
 	dirname.c \
 	dirname_compat.c \
 	disklabel.c \
 	dlfcn.c \
 	drand48.c \
 	dup3.c \
 	elf_utils.c \
 	erand48.c \
 	err.c \
 	errlst.c \
 	errno.c \
 	exec.c \
 	fdevname.c \
 	feature_present.c \
 	fmtcheck.c \
 	fmtmsg.c \
 	fnmatch.c \
 	fpclassify.c \
 	frexp.c \
 	fstab.c \
 	ftok.c \
 	fts.c \
 	ftw.c \
 	getbootfile.c \
 	getbsize.c \
 	getcap.c \
 	getcwd.c \
 	getdomainname.c \
 	getgrent.c \
 	getgrouplist.c \
 	gethostname.c \
 	getloadavg.c \
 	getlogin.c \
 	getmntinfo.c \
 	getnetgrent.c \
 	getosreldate.c \
 	getpagesize.c \
 	getpagesizes.c \
 	getpeereid.c \
 	getprogname.c \
 	getpwent.c \
 	getttyent.c \
 	getusershell.c \
 	getutxent.c \
 	getvfsbyname.c \
 	glob.c \
 	initgroups.c \
 	isatty.c \
 	isinf.c \
 	isnan.c \
 	jrand48.c \
 	lcong48.c \
 	libc_dlopen.c \
 	lockf.c \
 	lrand48.c \
 	mrand48.c \
 	nftw.c \
 	nice.c \
 	nlist.c \
 	nrand48.c \
 	opendir.c \
 	pause.c \
 	pmadvise.c \
 	popen.c \
 	posix_spawn.c \
 	psignal.c \
 	pututxline.c \
 	pw_scan.c \
 	raise.c \
 	readdir.c \
 	readpassphrase.c \
 	recvmmsg.c \
 	rewinddir.c \
 	scandir.c \
 	seed48.c \
 	seekdir.c \
 	semctl.c \
 	sendmmsg.c \
 	setdomainname.c \
 	sethostname.c \
 	setjmperr.c \
 	setmode.c \
 	setproctitle.c \
 	setprogname.c \
 	siginterrupt.c \
 	siglist.c \
 	signal.c \
 	sigsetops.c \
 	sleep.c \
 	srand48.c \
 	statvfs.c \
 	stringlist.c \
 	strtofflags.c \
 	sysconf.c \
 	sysctl.c \
 	sysctlbyname.c \
 	sysctlnametomib.c \
 	syslog.c \
 	telldir.c \
 	termios.c \
 	time.c \
 	times.c \
 	timezone.c \
 	tls.c \
 	ttyname.c \
 	ttyslot.c \
 	ualarm.c \
 	ulimit.c \
 	uname.c \
 	usleep.c \
 	utime.c \
 	utxdb.c \
 	valloc.c \
 	wait.c \
 	wait3.c \
 	waitpid.c \
 	waitid.c \
 	wordexp.c
 .if ${MK_SYMVER} == yes
-SRCS+=	fts-compat.c \
+SRCS+=	devname-compat11.c \
+	fts-compat.c \
+	fts-compat11.c \
+	ftw-compat11.c \
+	getmntinfo-compat11.c \
+	glob-compat11.c \
+	nftw-compat11.c \
+	readdir-compat11.c \
+	scandir-compat11.c \
 	unvis-compat.c
 .endif
 
 .PATH: ${SRCTOP}/contrib/libc-pwcache
 SRCS+=	pwcache.c pwcache.h
 
 .PATH: ${SRCTOP}/contrib/libc-vis
 CFLAGS+=	-I${SRCTOP}/contrib/libc-vis
 SRCS+=	unvis.c vis.c
 
 MISRCS+=modf.c
 
 CANCELPOINTS_SRCS=sem.c sem_new.c
 .for src in ${CANCELPOINTS_SRCS}
 SRCS+=cancelpoints_${src}
 CLEANFILES+=cancelpoints_${src}
 cancelpoints_${src}: ${LIBC_SRCTOP}/gen/${src} .NOMETA
 	ln -sf ${.ALLSRC} ${.TARGET}
 .endfor
 
 SYM_MAPS+=${LIBC_SRCTOP}/gen/Symbol.map
 
 # machine-dependent gen sources
 .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/gen/Makefile.inc"
 
 MAN+=	alarm.3 \
 	arc4random.3 \
 	basename.3 \
 	cap_rights_get.3 \
 	cap_sandboxed.3 \
 	check_utility_compat.3 \
 	clock.3 \
 	clock_getcpuclockid.3 \
 	confstr.3 \
 	ctermid.3 \
 	daemon.3 \
 	devname.3 \
 	directory.3 \
 	dirname.3 \
 	dl_iterate_phdr.3 \
 	dladdr.3 \
 	dlinfo.3 \
 	dllockinit.3 \
 	dlopen.3 \
 	dup3.3 \
 	err.3 \
 	exec.3 \
 	feature_present.3 \
 	fmtcheck.3 \
 	fmtmsg.3 \
 	fnmatch.3 \
 	fpclassify.3 \
 	frexp.3 \
 	ftok.3 \
 	fts.3 \
 	ftw.3 \
 	getbootfile.3 \
 	getbsize.3 \
 	getcap.3 \
 	getcontext.3 \
 	getcwd.3 \
 	getdiskbyname.3 \
 	getdomainname.3 \
 	getfsent.3 \
 	getgrent.3 \
 	getgrouplist.3 \
 	gethostname.3 \
 	getloadavg.3 \
 	getmntinfo.3 \
 	getnetgrent.3 \
 	getosreldate.3 \
 	getpagesize.3 \
 	getpagesizes.3 \
 	getpass.3 \
 	getpeereid.3 \
 	getprogname.3 \
 	getpwent.3 \
 	getttyent.3 \
 	getusershell.3 \
 	getutxent.3 \
 	getvfsbyname.3 \
 	glob.3 \
 	initgroups.3 \
 	isgreater.3 \
 	ldexp.3 \
 	lockf.3 \
 	makecontext.3 \
 	modf.3 \
 	nice.3 \
 	nlist.3 \
 	pause.3 \
 	popen.3 \
 	posix_spawn.3 \
 	posix_spawn_file_actions_addopen.3 \
 	posix_spawn_file_actions_init.3 \
 	posix_spawnattr_getflags.3 \
 	posix_spawnattr_getpgroup.3 \
 	posix_spawnattr_getschedparam.3 \
 	posix_spawnattr_getschedpolicy.3 \
 	posix_spawnattr_init.3 \
 	posix_spawnattr_getsigdefault.3 \
 	posix_spawnattr_getsigmask.3 \
 	psignal.3 \
 	pwcache.3 \
 	raise.3 \
 	rand48.3 \
 	readpassphrase.3 \
 	rfork_thread.3 \
 	scandir.3 \
 	sem_destroy.3 \
 	sem_getvalue.3 \
 	sem_init.3 \
 	sem_open.3 \
 	sem_post.3 \
 	sem_timedwait.3 \
 	sem_wait.3 \
 	setjmp.3 \
 	setmode.3 \
 	setproctitle.3 \
 	siginterrupt.3 \
 	signal.3 \
 	sigsetops.3 \
 	sleep.3 \
 	statvfs.3 \
 	stringlist.3 \
 	strtofflags.3 \
 	sysconf.3 \
 	sysctl.3 \
 	syslog.3 \
 	tcgetpgrp.3 \
 	tcgetsid.3 \
 	tcsendbreak.3 \
 	tcsetattr.3 \
 	tcsetpgrp.3 \
 	tcsetsid.3 \
 	time.3 \
 	times.3 \
 	timezone.3 \
 	ttyname.3 \
 	tzset.3 \
 	ualarm.3 \
 	ucontext.3 \
 	ulimit.3 \
 	uname.3 \
 	unvis.3 \
 	usleep.3 \
 	utime.3 \
 	valloc.3 \
 	vis.3 \
 	wordexp.3
 
 MLINKS+=arc4random.3 arc4random_addrandom.3 \
 	arc4random.3 arc4random_stir.3 \
 	arc4random.3 arc4random_buf.3 \
 	arc4random.3 arc4random_uniform.3
 MLINKS+=basename.3 basename_r.3
 MLINKS+=ctermid.3 ctermid_r.3
 MLINKS+=devname.3 devname_r.3
 MLINKS+=devname.3 fdevname.3
 MLINKS+=devname.3 fdevname_r.3
 MLINKS+=directory.3 closedir.3 \
 	directory.3 dirfd.3 \
 	directory.3 fdclosedir.3 \
 	directory.3 fdopendir.3 \
 	directory.3 opendir.3 \
 	directory.3 readdir.3 \
 	directory.3 readdir_r.3 \
 	directory.3 rewinddir.3 \
 	directory.3 seekdir.3 \
 	directory.3 telldir.3
 MLINKS+=dlopen.3 fdlopen.3 \
 	dlopen.3 dlclose.3 \
 	dlopen.3 dlerror.3 \
 	dlopen.3 dlfunc.3 \
 	dlopen.3 dlsym.3
 MLINKS+=err.3 err_set_exit.3 \
 	err.3 err_set_file.3 \
 	err.3 errc.3 \
 	err.3 errx.3 \
 	err.3 verr.3 \
 	err.3 verrc.3 \
 	err.3 verrx.3 \
 	err.3 vwarn.3 \
 	err.3 vwarnc.3 \
 	err.3 vwarnx.3 \
 	err.3 warnc.3 \
 	err.3 warn.3 \
 	err.3 warnx.3
 MLINKS+=exec.3 execl.3 \
 	exec.3 execle.3 \
 	exec.3 execlp.3 \
 	exec.3 exect.3 \
 	exec.3 execv.3 \
 	exec.3 execvP.3 \
 	exec.3 execvp.3
 MLINKS+=fpclassify.3 finite.3 \
 	fpclassify.3 finitef.3 \
 	fpclassify.3 isfinite.3 \
 	fpclassify.3 isinf.3 \
 	fpclassify.3 isnan.3 \
 	fpclassify.3 isnormal.3
 MLINKS+=frexp.3 frexpf.3 \
 	frexp.3 frexpl.3
 MLINKS+=fts.3 fts_children.3 \
 	fts.3 fts_close.3 \
 	fts.3 fts_open.3 \
 	fts.3 fts_read.3 \
 	fts.3 fts_set.3 \
 	fts.3 fts_set_clientptr.3 \
 	fts.3 fts_get_clientptr.3 \
 	fts.3 fts_get_stream.3
 MLINKS+=ftw.3 nftw.3
 MLINKS+=getcap.3 cgetcap.3 \
 	getcap.3 cgetclose.3 \
 	getcap.3 cgetent.3 \
 	getcap.3 cgetfirst.3 \
 	getcap.3 cgetmatch.3 \
 	getcap.3 cgetnext.3 \
 	getcap.3 cgetnum.3 \
 	getcap.3 cgetset.3 \
 	getcap.3 cgetstr.3 \
 	getcap.3 cgetustr.3
 MLINKS+=getcwd.3 getwd.3
 MLINKS+=getcontext.3 getcontextx.3
 MLINKS+=getcontext.3 setcontext.3
 MLINKS+=getdomainname.3 setdomainname.3
 MLINKS+=getfsent.3 endfsent.3 \
 	getfsent.3 getfsfile.3 \
 	getfsent.3 getfsspec.3 \
 	getfsent.3 getfstype.3 \
 	getfsent.3 setfsent.3 \
 	getfsent.3 setfstab.3 \
 	getfsent.3 getfstab.3
 MLINKS+=getgrent.3 endgrent.3 \
 	getgrent.3 getgrgid.3 \
 	getgrent.3 getgrnam.3 \
 	getgrent.3 setgrent.3 \
 	getgrent.3 setgroupent.3 \
 	getgrent.3 getgrent_r.3 \
 	getgrent.3 getgrnam_r.3 \
 	getgrent.3 getgrgid_r.3
 MLINKS+=gethostname.3 sethostname.3
 MLINKS+=getnetgrent.3 endnetgrent.3 \
 	getnetgrent.3 getnetgrent_r.3 \
 	getnetgrent.3 innetgr.3 \
 	getnetgrent.3 setnetgrent.3
 MLINKS+=getprogname.3 setprogname.3
 MLINKS+=getpwent.3 endpwent.3 \
 	getpwent.3 getpwnam.3 \
 	getpwent.3 getpwuid.3 \
 	getpwent.3 setpassent.3 \
 	getpwent.3 setpwent.3 \
 	getpwent.3 setpwfile.3 \
 	getpwent.3 getpwent_r.3 \
 	getpwent.3 getpwnam_r.3 \
 	getpwent.3 getpwuid_r.3
 MLINKS+=getttyent.3 endttyent.3 \
 	getttyent.3 getttynam.3 \
 	getttyent.3 isdialuptty.3 \
 	getttyent.3 isnettty.3 \
 	getttyent.3 setttyent.3
 MLINKS+=getusershell.3 endusershell.3 \
 	getusershell.3 setusershell.3
 MLINKS+=getutxent.3 endutxent.3 \
 	getutxent.3 getutxid.3 \
 	getutxent.3 getutxline.3 \
 	getutxent.3 getutxuser.3 \
 	getutxent.3 pututxline.3 \
 	getutxent.3 setutxdb.3 \
 	getutxent.3 setutxent.3 \
 	getutxent.3 utmpx.3
 MLINKS+=glob.3 globfree.3
 MLINKS+=isgreater.3 isgreaterequal.3 \
 	isgreater.3 isless.3 \
 	isgreater.3 islessequal.3 \
 	isgreater.3 islessgreater.3 \
 	isgreater.3 isunordered.3
 MLINKS+=ldexp.3 ldexpf.3 \
 	ldexp.3 ldexpl.3
 MLINKS+=makecontext.3 swapcontext.3
 MLINKS+=modf.3 modff.3 \
 	modf.3 modfl.3
 MLINKS+=popen.3 pclose.3
 MLINKS+=posix_spawn.3 posix_spawnp.3 \
 	posix_spawn_file_actions_addopen.3 posix_spawn_file_actions_addclose.3 \
 	posix_spawn_file_actions_addopen.3 posix_spawn_file_actions_adddup2.3 \
 	posix_spawn_file_actions_init.3 posix_spawn_file_actions_destroy.3 \
 	posix_spawnattr_getflags.3 posix_spawnattr_setflags.3 \
 	posix_spawnattr_getpgroup.3 posix_spawnattr_setpgroup.3 \
 	posix_spawnattr_getschedparam.3 posix_spawnattr_setschedparam.3 \
 	posix_spawnattr_getschedpolicy.3 posix_spawnattr_setschedpolicy.3 \
 	posix_spawnattr_getsigdefault.3 posix_spawnattr_setsigdefault.3 \
 	posix_spawnattr_getsigmask.3 posix_spawnattr_setsigmask.3 \
 	posix_spawnattr_init.3 posix_spawnattr_destroy.3
 MLINKS+=psignal.3 strsignal.3 \
 	psignal.3 sys_siglist.3 \
 	psignal.3 sys_signame.3
 MLINKS+=pwcache.3 group_from_gid.3 \
 	pwcache.3 user_from_uid.3
 MLINKS+=rand48.3 _rand48.3 \
 	rand48.3 drand48.3 \
 	rand48.3 erand48.3 \
 	rand48.3 jrand48.3 \
 	rand48.3 lcong48.3 \
 	rand48.3 lrand48.3 \
 	rand48.3 mrand48.3 \
 	rand48.3 nrand48.3 \
 	rand48.3 seed48.3 \
 	rand48.3 srand48.3
 MLINKS+=recv.2 recvmmsg.2
 MLINKS+=scandir.3 alphasort.3
 MLINKS+=sem_open.3 sem_close.3 \
 	sem_open.3 sem_unlink.3
 MLINKS+=sem_wait.3 sem_trywait.3
 MLINKS+=sem_timedwait.3 sem_clockwait_np.3
 MLINKS+=send.2 sendmmsg.2
 MLINKS+=setjmp.3 _longjmp.3 \
 	setjmp.3 _setjmp.3 \
 	setjmp.3 longjmp.3 \
 	setjmp.3 longjmperr.3 \
 	setjmp.3 longjmperror.3 \
 	setjmp.3 siglongjmp.3 \
 	setjmp.3 sigsetjmp.3
 MLINKS+=setmode.3 getmode.3
 MLINKS+=sigsetops.3 sigaddset.3 \
 	sigsetops.3 sigdelset.3 \
 	sigsetops.3 sigemptyset.3 \
 	sigsetops.3 sigfillset.3 \
 	sigsetops.3 sigismember.3
 MLINKS+=statvfs.3 fstatvfs.3
 MLINKS+=stringlist.3 sl_add.3 \
 	stringlist.3 sl_find.3 \
 	stringlist.3 sl_free.3 \
 	stringlist.3 sl_init.3
 MLINKS+=strtofflags.3 fflagstostr.3
 MLINKS+=sysctl.3 sysctlbyname.3 \
 	sysctl.3 sysctlnametomib.3
 MLINKS+=syslog.3 closelog.3 \
 	syslog.3 openlog.3 \
 	syslog.3 setlogmask.3 \
 	syslog.3 vsyslog.3
 MLINKS+=tcsendbreak.3 tcdrain.3 \
 	tcsendbreak.3 tcflow.3 \
 	tcsendbreak.3 tcflush.3
 MLINKS+=tcsetattr.3 cfgetispeed.3 \
 	tcsetattr.3 cfgetospeed.3 \
 	tcsetattr.3 cfmakeraw.3 \
 	tcsetattr.3 cfmakesane.3 \
 	tcsetattr.3 cfsetispeed.3 \
 	tcsetattr.3 cfsetospeed.3 \
 	tcsetattr.3 cfsetspeed.3 \
 	tcsetattr.3 tcgetattr.3
 MLINKS+=ttyname.3 isatty.3 \
 	ttyname.3 ttyname_r.3
 MLINKS+=tzset.3 tzsetwall.3
 MLINKS+=unvis.3 strunvis.3 \
 	unvis.3 strunvisx.3
 MLINKS+=vis.3 nvis.3 \
 	vis.3 snvis.3 \
 	vis.3 strenvisx.3 \
 	vis.3 strnunvis.3 \
 	vis.3 strnunvisx.3 \
 	vis.3 strnvis.3 \
 	vis.3 strnvisx.3 \
 	vis.3 strsenvisx.3 \
 	vis.3 strsnvis.3 \
 	vis.3 strsnvisx.3 \
 	vis.3 strsvis.3 \
 	vis.3 strsvisx.3 \
 	vis.3 strvis.3 \
 	vis.3 strvisx.3 \
 	vis.3 svis.3
 
 MLINKS+=wordexp.3 wordfree.3
Index: head/lib/libc/gen/Symbol.map
===================================================================
--- head/lib/libc/gen/Symbol.map	(revision 318735)
+++ head/lib/libc/gen/Symbol.map	(revision 318736)
@@ -1,547 +1,547 @@
 /*
  * $FreeBSD$
  */
 
 FBSD_1.0 {
 	__xuname;
 	pthread_atfork;
 	pthread_attr_destroy;
 	pthread_attr_getdetachstate;
 	pthread_attr_getguardsize;
 	pthread_attr_getinheritsched;
 	pthread_attr_getschedparam;
 	pthread_attr_getschedpolicy;
 	pthread_attr_getscope;
 	pthread_attr_getstackaddr;
 	pthread_attr_getstacksize;
 	pthread_attr_init;
 	pthread_attr_setdetachstate;
 	pthread_attr_setguardsize;
 	pthread_attr_setinheritsched;
 	pthread_attr_setschedparam;
 	pthread_attr_setschedpolicy;
 	pthread_attr_setscope;
 	pthread_attr_setstackaddr;
 	pthread_attr_setstacksize;
 	pthread_cancel;
 	pthread_cleanup_pop;
 	pthread_cleanup_push;
 	pthread_cond_broadcast;
 	pthread_cond_destroy;
 	pthread_cond_init;
 	pthread_cond_signal;
 	pthread_cond_timedwait;
 	pthread_cond_wait;
 	pthread_detach;
 	pthread_equal;
 	pthread_exit;
 	pthread_getspecific;
 	pthread_join;
 	pthread_key_create;
 	pthread_key_delete;
 	pthread_kill;
 	pthread_main_np;
 	pthread_mutex_destroy;
 	pthread_mutex_init;
 	pthread_mutex_lock;
 	pthread_mutex_trylock;
 	pthread_mutex_unlock;
 	pthread_mutexattr_destroy;
 	pthread_mutexattr_init;
 	pthread_mutexattr_settype;
 	pthread_once;
 	pthread_rwlock_destroy;
 	pthread_rwlock_init;
 	pthread_rwlock_rdlock;
 	pthread_rwlock_tryrdlock;
 	pthread_rwlock_trywrlock;
 	pthread_rwlock_unlock;
 	pthread_rwlock_wrlock;
 	pthread_self;
 	pthread_setcancelstate;
 	pthread_setcanceltype;
 	pthread_setspecific;
 	pthread_sigmask;
 	pthread_testcancel;
 	alarm;
 	arc4random;
 	arc4random_addrandom;
 	arc4random_stir;
 	__assert;
 	check_utility_compat;
 	clock;
 	closedir;
 	confstr;
 	ctermid;
 	ctermid_r;
 	daemon;
-	devname;
-	devname_r;
 	getdiskbyname;
 	dladdr;
 	dlclose;
 	dlerror;
 	dlfunc;
 	dllockinit;
 	dlopen;
 	dlsym;
 	dlvsym;
 	dlinfo;
 	dl_iterate_phdr;
 	drand48;
 	erand48;
 	err_set_file;
 	err_set_exit;
 	err;
 	verr;
 	errc;
 	verrc;
 	errx;
 	verrx;
 	warn;
 	vwarn;
 	warnc;
 	vwarnc;
 	warnx;
 	vwarnx;
 	sys_errlist;
 	sys_nerr;
 	errno;
 	execl;
 	execle;
 	execlp;
 	execv;
 	execvp;
 	execvP;
 	fmtcheck;
 	fmtmsg;
 	fnmatch;
 	__fpclassifyf;
 	__fpclassifyd;
 	__fpclassifyl;
 	frexp;
 	setfstab;
 	getfstab;
 	getfsent;
 	getfsspec;
 	getfsfile;
 	setfsent;
 	endfsent;
 	ftok;
-	ftw;
-	glob;
-	globfree;
 	getbootfile;
 	getbsize;
 	cgetset;
 	cgetcap;
 	cgetent;
 	cgetmatch;
 	cgetfirst;
 	cgetclose;
 	cgetnext;
 	cgetstr;
 	cgetustr;
 	cgetnum;
 	getcwd;
 	getdomainname;
 	setgrent;
 	setgroupent;
 	endgrent;
 	getgrent_r;
 	getgrnam_r;
 	getgrgid_r;
 	getgrnam;
 	getgrgid;
 	getgrent;
 	/*
 	 * Why are __gr_parse_entry() and __gr_match_entry() not static in
 	 * gen/getgrent.c?
 	 */
 	getgrouplist;
 	gethostname;
 	getloadavg;
 	getlogin;
 	getlogin_r;
-	getmntinfo;
 	setnetgrent;
 	getnetgrent;
 	endnetgrent;
 	innetgr;
 	getosreldate;
 	getpagesize;
 	getpeereid;
 	_getprogname;
 	getprogname;
 	setpwent;
 	setpassent;
 	endpwent;
 	getpwent_r;
 	getpwnam_r;
 	getpwuid_r;
 	getpwnam;
 	getpwuid;
 	getpwent;
 	getttynam;
 	getttyent;
 	setttyent;
 	endttyent;
 	isdialuptty;
 	isnettty;
 	getusershell;
 	endusershell;
 	setusershell;
 	getvfsbyname;
 	__isnan;
 	isnan;
 	__isnanf;
 	isnanf;
 	__isinf;
 	isinf;
 	__isinff;
 	__isinfl;
 	isatty;
 	initgroups;
 	jrand48;
 	lcong48;
 	ldexp;
 	lockf;
 	lrand48;
 	modf;
 	mrand48;
-	nftw;
 	nice;
 	nlist;
 	nrand48;
 	opendir;
 	pause;
 	posix_madvise;
 	popen;
 	pclose;
 	psignal;
 	raise;
-	readdir;
-	readdir_r;
 	readpassphrase;
 	getpass;
 	rewinddir;
-	scandir;
-	alphasort;
 	seed48;
 	seekdir;
 	user_from_uid;
 	group_from_gid;
 	setdomainname;
 	sethostname;
 	longjmperror;
 	getmode;
 	setmode;
 	setproctitle;
 	setprogname;
 	siginterrupt;
 	sys_signame;
 	sys_siglist;
 	sys_nsig;
 	signal;
 	sigaddset;
 	sigdelset;
 	sigemptyset;
 	sigfillset;
 	sigismember;
 	sleep;
 	srand48;
 	fstatvfs;
 	statvfs;
 	sl_init;
 	sl_add;
 	sl_free;
 	sl_find;
 	fflagstostr;
 	strtofflags;
 	sysconf;
 	sysctl;
 	sysctlbyname;
 	sysctlnametomib;
 	syslog;
 	vsyslog;
 	openlog;
 	closelog;
 	setlogmask;
 	ttyname_r;
 	ttyname;
 	timezone;
 	times;
 	time;
 	telldir;
 	tcgetattr;
 	tcsetattr;
 	tcsetpgrp;
 	tcgetpgrp;
 	cfgetospeed;
 	cfgetispeed;
 	cfsetospeed;
 	cfsetispeed;
 	cfsetspeed;
 	cfmakeraw;
 	tcsendbreak;
 	_init_tls;
 	__tls_get_addr;
 	tcdrain;
 	tcflush;
 	tcflow;
 	ualarm;
 	ulimit;
 	uname;
 	strunvis;
 	strunvisx;
 	usleep;
 	utime;
 	valloc;
 	vis;
 	strvis;
 	strvisx;
 	wait;
 	wait3;
 	waitpid;
 	wordexp;
 	wordfree;
 };
 
 FBSD_1.1 {
 	arc4random_buf;
 	arc4random_uniform;
 	fdevname;
 	fdevname_r;
 	fdopendir;
 	feature_present;
-	fts_children;
-	fts_close;
-	fts_get_clientptr;
-	fts_get_stream;
-	fts_open;
-	fts_read;
-	fts_set;
-	fts_set_clientptr;
 	posix_spawn;
 	posix_spawn_file_actions_addclose;
 	posix_spawn_file_actions_adddup2;
 	posix_spawn_file_actions_addopen;
 	posix_spawn_file_actions_destroy;
 	posix_spawn_file_actions_init;
 	posix_spawnattr_destroy;
 	posix_spawnattr_getflags;
 	posix_spawnattr_getpgroup;
 	posix_spawnattr_getschedparam;
 	posix_spawnattr_getschedpolicy;
 	posix_spawnattr_getsigdefault;
 	posix_spawnattr_getsigmask;
 	posix_spawnattr_init;
 	posix_spawnattr_setflags;
 	posix_spawnattr_setpgroup;
 	posix_spawnattr_setschedparam;
 	posix_spawnattr_setschedpolicy;
 	posix_spawnattr_setsigdefault;
 	posix_spawnattr_setsigmask;
 	posix_spawnp;
 	semctl;
 	tcgetsid;
 	tcsetsid;
 	__pthread_cleanup_pop_imp;
 	__pthread_cleanup_push_imp;
 };
 
 FBSD_1.2 {
 	basename_r;
 	cfmakesane;
 	endutxent;
 	getpagesizes;
 	getutxent;
 	getutxid;
 	getutxline;
 	getutxuser;
 	pututxline;
 	sem_close;
 	sem_destroy;
 	sem_getvalue;
 	sem_init;
 	sem_open;
 	sem_post;
 	sem_timedwait;
 	sem_trywait;
 	sem_unlink;
 	sem_wait;
 	setutxdb;
 	setutxent;
 };
 
 FBSD_1.3 {
 	clock_getcpuclockid;
 	dirfd;
 	dup3;
 	fdclosedir;
 	fdlopen;
 	__FreeBSD_libc_enter_restricted_mode;
 	getcontextx;
 	gid_from_group;
 	nvis;
 	pwcache_userdb;
 	pwcache_groupdb;
 	snvis;
 	strenvisx;
 	strnunvis;
 	strnunvisx;
 	strnvis;
 	strnvisx;
 	strsenvisx;
 	strsnvis;
 	strsnvisx;
 	strsvis;
 	strsvisx;
 	svis;
 	uid_from_user;
 	unvis;
 	waitid;
 };
 
 FBSD_1.4 {
 	getnetgrent_r;
 	pthread_mutex_consistent;
 	pthread_mutexattr_getrobust;
 	pthread_mutexattr_setrobust;
-	scandir_b;
 	stravis;
 };
 
 FBSD_1.5 {
+	alphasort;
 	basename;
+	devname;
+	devname_r;
 	dirname;
+	fts_children;
+	fts_close;
+	fts_get_clientptr;
+	fts_get_stream;
+	fts_open;
+	fts_read;
+	fts_set;
+	fts_set_clientptr;
+	ftw;
+	getmntinfo;
+	glob;
+	globfree;
+	nftw;
+	readdir;
+	readdir_r;
+	scandir;
+	scandir_b;
 	sem_clockwait_np;
 };
 
 FBSDprivate_1.0 {
 	/* needed by thread libraries */
 	__thr_jtable;
 
 	_pthread_atfork;
 	_pthread_attr_destroy;
 	_pthread_attr_getdetachstate;
 	_pthread_attr_getguardsize;
 	_pthread_attr_getinheritsched;
 	_pthread_attr_getschedparam;
 	_pthread_attr_getschedpolicy;
 	_pthread_attr_getscope;
 	_pthread_attr_getstackaddr;
 	_pthread_attr_getstacksize;
 	_pthread_attr_init;
 	_pthread_attr_setdetachstate;
 	_pthread_attr_setguardsize;
 	_pthread_attr_setinheritsched;
 	_pthread_attr_setschedparam;
 	_pthread_attr_setschedpolicy;
 	_pthread_attr_setscope;
 	_pthread_attr_setstackaddr;
 	_pthread_attr_setstacksize;
 	_pthread_cancel;
 	_pthread_cancel_enter;
 	_pthread_cancel_leave;
 	_pthread_cleanup_pop;
 	_pthread_cleanup_push;
 	_pthread_cond_broadcast;
 	_pthread_cond_destroy;
 	_pthread_cond_init;
 	_pthread_cond_signal;
 	_pthread_cond_timedwait;
 	_pthread_cond_wait;
 	_pthread_detach;
 	_pthread_equal;
 	_pthread_exit;
 	_pthread_getspecific;
 	_pthread_join;
 	_pthread_key_create;
 	_pthread_key_delete;
 	_pthread_kill;
 	_pthread_main_np;
 	_pthread_mutex_destroy;
 	_pthread_mutex_init_calloc_cb;
 	_pthread_mutex_init;
 	_pthread_mutex_lock;
 	_pthread_mutex_trylock;
 	_pthread_mutex_unlock;
 	_pthread_mutexattr_destroy;
 	_pthread_mutexattr_init;
 	_pthread_mutexattr_settype;
 	_pthread_once;
 	_pthread_rwlock_destroy;
 	_pthread_rwlock_init;
 	_pthread_rwlock_rdlock;
 	_pthread_rwlock_tryrdlock;
 	_pthread_rwlock_trywrlock;
 	_pthread_rwlock_unlock;
 	_pthread_rwlock_wrlock;
 	_pthread_self;
 	_pthread_setcancelstate;
 	_pthread_setcanceltype;
 	_pthread_setspecific;
 	_pthread_sigmask;
 	_pthread_testcancel;
 	_spinlock;
 	_spinunlock;
 	_rtld_addr_phdr;
 	_rtld_atfork_pre;
 	_rtld_atfork_post;
 	_rtld_error;		/* for private use */
 	_rtld_get_stack_prot;
 	_rtld_is_dlopened;
 	_rtld_thread_init;	/* for private use */
 	__elf_phdr_match_addr;
 	_err;
 	_warn;
 	__fmtcheck;
 	/* __pw_match_entry; */
 	/* __pw_parse_entry; */
 	__fdnlist;	/* used by libkvm */
 	/* __aout_fdnlist; */
 	/* __elf_is_okay__; */
 	/* __elf_fdnlist; */
 	__opendir2;
 	__pause;
 	_pause;
 	__pw_scan;	/* Used by (at least) libutil */
 	__raise;
 	_raise;
 	__sleep;
 	_sleep;
 	_rtld_allocate_tls;
 	_rtld_free_tls;
 #if defined(i386)
 	___libc_tls_get_addr;	/* x86 only */
 #endif
 	__libc_tls_get_addr;
 	__tcdrain;
 	_tcdrain;
 	__usleep;
 	_usleep;
 	__wait;
 	_wait;
 	__waitpid;
 	_waitpid;
 
 	_libc_sem_init_compat;
 	_libc_sem_destroy_compat;
 	_libc_sem_open_compat;
 	_libc_sem_close_compat;
 	_libc_sem_unlink_compat;
 	_libc_sem_wait_compat;
 	_libc_sem_trywait_compat;
 	_libc_sem_timedwait_compat;
 	_libc_sem_post_compat;
 	_libc_sem_getvalue_compat;
 
 	__libc_tcdrain;
 
 	__elf_aux_vector;
 	__pthread_map_stacks_exec;
 	__fillcontextx;
 	__fillcontextx2;
 	__getcontextx_size;
 };
Index: head/lib/libc/gen/closedir.c
===================================================================
--- head/lib/libc/gen/closedir.c	(revision 318735)
+++ head/lib/libc/gen/closedir.c	(revision 318736)
@@ -1,76 +1,77 @@
 /*
  * Copyright (c) 1983, 1993
  *	Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)closedir.c	8.1 (Berkeley) 6/10/93";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/types.h>
 #include <dirent.h>
 #include <pthread.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include "un-namespace.h"
 
 #include "libc_private.h"
 #include "gen-private.h"
 #include "telldir.h"
 
 /*
  * close a directory.
  */
 int
 fdclosedir(DIR *dirp)
 {
 	int fd;
 
 	if (__isthreaded)
 		_pthread_mutex_lock(&dirp->dd_lock);
 	fd = dirp->dd_fd;
 	dirp->dd_fd = -1;
 	dirp->dd_loc = 0;
 	free((void *)dirp->dd_buf);
+	free(dirp->dd_compat_de);
 	_reclaim_telldir(dirp);
 	if (__isthreaded) {
 		_pthread_mutex_unlock(&dirp->dd_lock);
 		_pthread_mutex_destroy(&dirp->dd_lock);
 	}
 	free((void *)dirp);
 	return (fd);
 }
 
 int
 closedir(DIR *dirp)
 {
 
 	return (_close(fdclosedir(dirp)));
 }
Index: head/lib/libc/gen/devname-compat11.c
===================================================================
--- head/lib/libc/gen/devname-compat11.c	(nonexistent)
+++ head/lib/libc/gen/devname-compat11.c	(revision 318736)
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2011 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include "gen-compat.h"
+
+char *
+freebsd11_devname(uint32_t dev, mode_t type)
+{
+
+	return (devname(dev, type));
+}
+
+char *
+freebsd11_devname_r(uint32_t dev, mode_t type, char *buf, int len)
+{
+
+	return (devname_r(dev, type, buf, len));
+}
+
+__sym_compat(devname, freebsd11_devname, FBSD_1.0);
+__sym_compat(devname_r, freebsd11_devname_r, FBSD_1.0);

Property changes on: head/lib/libc/gen/devname-compat11.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/gen/fts-compat.c
===================================================================
--- head/lib/libc/gen/fts-compat.c	(revision 318735)
+++ head/lib/libc/gen/fts-compat.c	(revision 318736)
@@ -1,1214 +1,1219 @@
 /*-
  * Copyright (c) 1990, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $OpenBSD: fts.c,v 1.22 1999/10/03 19:22:22 millert Exp $
  */
 
 #if 0
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)fts.c	8.6 (Berkeley) 8/14/94";
 #endif /* LIBC_SCCS and not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
+#define	_WANT_FREEBSD11_STATFS
 #include <sys/mount.h>
+#define	_WANT_FREEBSD11_STAT
 #include <sys/stat.h>
 
+#define	_WANT_FREEBSD11_DIRENT
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include "gen-compat.h"
 #include "fts-compat.h"
 #include "un-namespace.h"
 
 #include "gen-private.h"
 
 FTSENT	*__fts_children_44bsd(FTS *, int);
 int	 __fts_close_44bsd(FTS *);
 void	*__fts_get_clientptr_44bsd(FTS *);
 FTS	*__fts_get_stream_44bsd(FTSENT *);
 FTS	*__fts_open_44bsd(char * const *, int,
 	    int (*)(const FTSENT * const *, const FTSENT * const *));
 FTSENT	*__fts_read_44bsd(FTS *);
 int	 __fts_set_44bsd(FTS *, FTSENT *, int);
 void	 __fts_set_clientptr_44bsd(FTS *, void *);
 
 static FTSENT	*fts_alloc(FTS *, char *, int);
 static FTSENT	*fts_build(FTS *, int);
 static void	 fts_lfree(FTSENT *);
 static void	 fts_load(FTS *, FTSENT *);
 static size_t	 fts_maxarglen(char * const *);
 static void	 fts_padjust(FTS *, FTSENT *);
 static int	 fts_palloc(FTS *, size_t);
 static FTSENT	*fts_sort(FTS *, FTSENT *, int);
 static u_short	 fts_stat(FTS *, FTSENT *, int);
 static int	 fts_safe_changedir(FTS *, FTSENT *, int, char *);
 static int	 fts_ufslinks(FTS *, const FTSENT *);
 
 #define	ISDOT(a)	(a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
 
 #define	CLR(opt)	(sp->fts_options &= ~(opt))
 #define	ISSET(opt)	(sp->fts_options & (opt))
 #define	SET(opt)	(sp->fts_options |= (opt))
 
 #define	FCHDIR(sp, fd)	(!ISSET(FTS_NOCHDIR) && fchdir(fd))
 
 /* fts_build flags */
 #define	BCHILD		1		/* fts_children */
 #define	BNAMES		2		/* fts_children, names only */
 #define	BREAD		3		/* fts_read */
 
 /*
  * Internal representation of an FTS, including extra implementation
  * details.  The FTS returned from fts_open points to this structure's
  * ftsp_fts member (and can be cast to an _fts_private as required)
  */
 struct _fts_private {
 	FTS		ftsp_fts;
-	struct statfs	ftsp_statfs;
-	dev_t		ftsp_dev;
+	struct freebsd11_statfs	ftsp_statfs;
+	uint32_t	ftsp_dev;
 	int		ftsp_linksreliable;
 };
 
 /*
  * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it
  * knows that a directory could not possibly have subdirectories.  This
  * is decided by looking at the link count: a subdirectory would
  * increment its parent's link count by virtue of its own ".." entry.
  * This assumption only holds for UFS-like filesystems that implement
  * links and directories this way, so we must punt for others.
  */
 
 static const char *ufslike_filesystems[] = {
 	"ufs",
 	"zfs",
 	"nfs",
 	"ext2fs",
 	0
 };
 
 FTS *
 __fts_open_44bsd(char * const *argv, int options,
     int (*compar)(const FTSENT * const *, const FTSENT * const *))
 {
 	struct _fts_private *priv;
 	FTS *sp;
 	FTSENT *p, *root;
 	int nitems;
 	FTSENT *parent, *tmp;
 	int len;
 
 	/* Options check. */
 	if (options & ~FTS_OPTIONMASK) {
 		errno = EINVAL;
 		return (NULL);
 	}
 
 	/* Allocate/initialize the stream. */
 	if ((priv = calloc(1, sizeof(*priv))) == NULL)
 		return (NULL);
 	sp = &priv->ftsp_fts;
 	sp->fts_compar = compar;
 	sp->fts_options = options;
 
 	/* Shush, GCC. */
 	tmp = NULL;
 
 	/* Logical walks turn on NOCHDIR; symbolic links are too hard. */
 	if (ISSET(FTS_LOGICAL))
 		SET(FTS_NOCHDIR);
 
 	/*
 	 * Start out with 1K of path space, and enough, in any case,
 	 * to hold the user's paths.
 	 */
 	if (fts_palloc(sp, MAX(fts_maxarglen(argv), MAXPATHLEN)))
 		goto mem1;
 
 	/* Allocate/initialize root's parent. */
 	if ((parent = fts_alloc(sp, "", 0)) == NULL)
 		goto mem2;
 	parent->fts_level = FTS_ROOTPARENTLEVEL;
 
 	/* Allocate/initialize root(s). */
 	for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
 		/* Don't allow zero-length paths. */
 		if ((len = strlen(*argv)) == 0) {
 			errno = ENOENT;
 			goto mem3;
 		}
 
 		p = fts_alloc(sp, *argv, len);
 		p->fts_level = FTS_ROOTLEVEL;
 		p->fts_parent = parent;
 		p->fts_accpath = p->fts_name;
 		p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW));
 
 		/* Command-line "." and ".." are real directories. */
 		if (p->fts_info == FTS_DOT)
 			p->fts_info = FTS_D;
 
 		/*
 		 * If comparison routine supplied, traverse in sorted
 		 * order; otherwise traverse in the order specified.
 		 */
 		if (compar) {
 			p->fts_link = root;
 			root = p;
 		} else {
 			p->fts_link = NULL;
 			if (root == NULL)
 				tmp = root = p;
 			else {
 				tmp->fts_link = p;
 				tmp = p;
 			}
 		}
 	}
 	if (compar && nitems > 1)
 		root = fts_sort(sp, root, nitems);
 
 	/*
 	 * Allocate a dummy pointer and make fts_read think that we've just
 	 * finished the node before the root(s); set p->fts_info to FTS_INIT
 	 * so that everything about the "current" node is ignored.
 	 */
 	if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
 		goto mem3;
 	sp->fts_cur->fts_link = root;
 	sp->fts_cur->fts_info = FTS_INIT;
 
 	/*
 	 * If using chdir(2), grab a file descriptor pointing to dot to ensure
 	 * that we can get back here; this could be avoided for some paths,
 	 * but almost certainly not worth the effort.  Slashes, symbolic links,
 	 * and ".." are all fairly nasty problems.  Note, if we can't get the
 	 * descriptor we run anyway, just more slowly.
 	 */
 	if (!ISSET(FTS_NOCHDIR) &&
 	    (sp->fts_rfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0)
 		SET(FTS_NOCHDIR);
 
 	return (sp);
 
 mem3:	fts_lfree(root);
 	free(parent);
 mem2:	free(sp->fts_path);
 mem1:	free(sp);
 	return (NULL);
 }
 
 static void
 fts_load(FTS *sp, FTSENT *p)
 {
 	int len;
 	char *cp;
 
 	/*
 	 * Load the stream structure for the next traversal.  Since we don't
 	 * actually enter the directory until after the preorder visit, set
 	 * the fts_accpath field specially so the chdir gets done to the right
 	 * place and the user can access the first node.  From fts_open it's
 	 * known that the path will fit.
 	 */
 	len = p->fts_pathlen = p->fts_namelen;
 	memmove(sp->fts_path, p->fts_name, len + 1);
 	if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
 		len = strlen(++cp);
 		memmove(p->fts_name, cp, len + 1);
 		p->fts_namelen = len;
 	}
 	p->fts_accpath = p->fts_path = sp->fts_path;
 	sp->fts_dev = p->fts_dev;
 }
 
 int
 __fts_close_44bsd(FTS *sp)
 {
 	FTSENT *freep, *p;
 	int saved_errno;
 
 	/*
 	 * This still works if we haven't read anything -- the dummy structure
 	 * points to the root list, so we step through to the end of the root
 	 * list which has a valid parent pointer.
 	 */
 	if (sp->fts_cur) {
 		for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
 			freep = p;
 			p = p->fts_link != NULL ? p->fts_link : p->fts_parent;
 			free(freep);
 		}
 		free(p);
 	}
 
 	/* Free up child linked list, sort array, path buffer. */
 	if (sp->fts_child)
 		fts_lfree(sp->fts_child);
 	if (sp->fts_array)
 		free(sp->fts_array);
 	free(sp->fts_path);
 
 	/* Return to original directory, save errno if necessary. */
 	if (!ISSET(FTS_NOCHDIR)) {
 		saved_errno = fchdir(sp->fts_rfd) ? errno : 0;
 		(void)_close(sp->fts_rfd);
 
 		/* Set errno and return. */
 		if (saved_errno != 0) {
 			/* Free up the stream pointer. */
 			free(sp);
 			errno = saved_errno;
 			return (-1);
 		}
 	}
 
 	/* Free up the stream pointer. */
 	free(sp);
 	return (0);
 }
 
 /*
  * Special case of "/" at the end of the path so that slashes aren't
  * appended which would cause paths to be written as "....//foo".
  */
 #define	NAPPEND(p)							\
 	(p->fts_path[p->fts_pathlen - 1] == '/'				\
 	    ? p->fts_pathlen - 1 : p->fts_pathlen)
 
 FTSENT *
 __fts_read_44bsd(FTS *sp)
 {
 	FTSENT *p, *tmp;
 	int instr;
 	char *t;
 	int saved_errno;
 
 	/* If finished or unrecoverable error, return NULL. */
 	if (sp->fts_cur == NULL || ISSET(FTS_STOP))
 		return (NULL);
 
 	/* Set current node pointer. */
 	p = sp->fts_cur;
 
 	/* Save and zero out user instructions. */
 	instr = p->fts_instr;
 	p->fts_instr = FTS_NOINSTR;
 
 	/* Any type of file may be re-visited; re-stat and re-turn. */
 	if (instr == FTS_AGAIN) {
 		p->fts_info = fts_stat(sp, p, 0);
 		return (p);
 	}
 
 	/*
 	 * Following a symlink -- SLNONE test allows application to see
 	 * SLNONE and recover.  If indirecting through a symlink, have
 	 * keep a pointer to current location.  If unable to get that
 	 * pointer, follow fails.
 	 */
 	if (instr == FTS_FOLLOW &&
 	    (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
 		p->fts_info = fts_stat(sp, p, 1);
 		if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
 			if ((p->fts_symfd = _open(".", O_RDONLY | O_CLOEXEC,
 			    0)) < 0) {
 				p->fts_errno = errno;
 				p->fts_info = FTS_ERR;
 			} else
 				p->fts_flags |= FTS_SYMFOLLOW;
 		}
 		return (p);
 	}
 
 	/* Directory in pre-order. */
 	if (p->fts_info == FTS_D) {
 		/* If skipped or crossed mount point, do post-order visit. */
 		if (instr == FTS_SKIP ||
 		    (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
 			if (p->fts_flags & FTS_SYMFOLLOW)
 				(void)_close(p->fts_symfd);
 			if (sp->fts_child) {
 				fts_lfree(sp->fts_child);
 				sp->fts_child = NULL;
 			}
 			p->fts_info = FTS_DP;
 			return (p);
 		}
 
 		/* Rebuild if only read the names and now traversing. */
 		if (sp->fts_child != NULL && ISSET(FTS_NAMEONLY)) {
 			CLR(FTS_NAMEONLY);
 			fts_lfree(sp->fts_child);
 			sp->fts_child = NULL;
 		}
 
 		/*
 		 * Cd to the subdirectory.
 		 *
 		 * If have already read and now fail to chdir, whack the list
 		 * to make the names come out right, and set the parent errno
 		 * so the application will eventually get an error condition.
 		 * Set the FTS_DONTCHDIR flag so that when we logically change
 		 * directories back to the parent we don't do a chdir.
 		 *
 		 * If haven't read do so.  If the read fails, fts_build sets
 		 * FTS_STOP or the fts_info field of the node.
 		 */
 		if (sp->fts_child != NULL) {
 			if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) {
 				p->fts_errno = errno;
 				p->fts_flags |= FTS_DONTCHDIR;
 				for (p = sp->fts_child; p != NULL;
 				    p = p->fts_link)
 					p->fts_accpath =
 					    p->fts_parent->fts_accpath;
 			}
 		} else if ((sp->fts_child = fts_build(sp, BREAD)) == NULL) {
 			if (ISSET(FTS_STOP))
 				return (NULL);
 			return (p);
 		}
 		p = sp->fts_child;
 		sp->fts_child = NULL;
 		goto name;
 	}
 
 	/* Move to the next node on this level. */
 next:	tmp = p;
 	if ((p = p->fts_link) != NULL) {
 		free(tmp);
 
 		/*
 		 * If reached the top, return to the original directory (or
 		 * the root of the tree), and load the paths for the next root.
 		 */
 		if (p->fts_level == FTS_ROOTLEVEL) {
 			if (FCHDIR(sp, sp->fts_rfd)) {
 				SET(FTS_STOP);
 				return (NULL);
 			}
 			fts_load(sp, p);
 			return (sp->fts_cur = p);
 		}
 
 		/*
 		 * User may have called fts_set on the node.  If skipped,
 		 * ignore.  If followed, get a file descriptor so we can
 		 * get back if necessary.
 		 */
 		if (p->fts_instr == FTS_SKIP)
 			goto next;
 		if (p->fts_instr == FTS_FOLLOW) {
 			p->fts_info = fts_stat(sp, p, 1);
 			if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
 				if ((p->fts_symfd =
 				    _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) {
 					p->fts_errno = errno;
 					p->fts_info = FTS_ERR;
 				} else
 					p->fts_flags |= FTS_SYMFOLLOW;
 			}
 			p->fts_instr = FTS_NOINSTR;
 		}
 
 name:		t = sp->fts_path + NAPPEND(p->fts_parent);
 		*t++ = '/';
 		memmove(t, p->fts_name, p->fts_namelen + 1);
 		return (sp->fts_cur = p);
 	}
 
 	/* Move up to the parent node. */
 	p = tmp->fts_parent;
 	free(tmp);
 
 	if (p->fts_level == FTS_ROOTPARENTLEVEL) {
 		/*
 		 * Done; free everything up and set errno to 0 so the user
 		 * can distinguish between error and EOF.
 		 */
 		free(p);
 		errno = 0;
 		return (sp->fts_cur = NULL);
 	}
 
 	/* NUL terminate the pathname. */
 	sp->fts_path[p->fts_pathlen] = '\0';
 
 	/*
 	 * Return to the parent directory.  If at a root node or came through
 	 * a symlink, go back through the file descriptor.  Otherwise, cd up
 	 * one directory.
 	 */
 	if (p->fts_level == FTS_ROOTLEVEL) {
 		if (FCHDIR(sp, sp->fts_rfd)) {
 			SET(FTS_STOP);
 			return (NULL);
 		}
 	} else if (p->fts_flags & FTS_SYMFOLLOW) {
 		if (FCHDIR(sp, p->fts_symfd)) {
 			saved_errno = errno;
 			(void)_close(p->fts_symfd);
 			errno = saved_errno;
 			SET(FTS_STOP);
 			return (NULL);
 		}
 		(void)_close(p->fts_symfd);
 	} else if (!(p->fts_flags & FTS_DONTCHDIR) &&
 	    fts_safe_changedir(sp, p->fts_parent, -1, "..")) {
 		SET(FTS_STOP);
 		return (NULL);
 	}
 	p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
 	return (sp->fts_cur = p);
 }
 
 /*
  * Fts_set takes the stream as an argument although it's not used in this
  * implementation; it would be necessary if anyone wanted to add global
  * semantics to fts using fts_set.  An error return is allowed for similar
  * reasons.
  */
 /* ARGSUSED */
 int
 __fts_set_44bsd(FTS *sp, FTSENT *p, int instr)
 {
 	if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW &&
 	    instr != FTS_NOINSTR && instr != FTS_SKIP) {
 		errno = EINVAL;
 		return (1);
 	}
 	p->fts_instr = instr;
 	return (0);
 }
 
 FTSENT *
 __fts_children_44bsd(FTS *sp, int instr)
 {
 	FTSENT *p;
 	int fd;
 
 	if (instr != 0 && instr != FTS_NAMEONLY) {
 		errno = EINVAL;
 		return (NULL);
 	}
 
 	/* Set current node pointer. */
 	p = sp->fts_cur;
 
 	/*
 	 * Errno set to 0 so user can distinguish empty directory from
 	 * an error.
 	 */
 	errno = 0;
 
 	/* Fatal errors stop here. */
 	if (ISSET(FTS_STOP))
 		return (NULL);
 
 	/* Return logical hierarchy of user's arguments. */
 	if (p->fts_info == FTS_INIT)
 		return (p->fts_link);
 
 	/*
 	 * If not a directory being visited in pre-order, stop here.  Could
 	 * allow FTS_DNR, assuming the user has fixed the problem, but the
 	 * same effect is available with FTS_AGAIN.
 	 */
 	if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */)
 		return (NULL);
 
 	/* Free up any previous child list. */
 	if (sp->fts_child != NULL)
 		fts_lfree(sp->fts_child);
 
 	if (instr == FTS_NAMEONLY) {
 		SET(FTS_NAMEONLY);
 		instr = BNAMES;
 	} else
 		instr = BCHILD;
 
 	/*
 	 * If using chdir on a relative path and called BEFORE fts_read does
 	 * its chdir to the root of a traversal, we can lose -- we need to
 	 * chdir into the subdirectory, and we don't know where the current
 	 * directory is, so we can't get back so that the upcoming chdir by
 	 * fts_read will work.
 	 */
 	if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' ||
 	    ISSET(FTS_NOCHDIR))
 		return (sp->fts_child = fts_build(sp, instr));
 
 	if ((fd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0)
 		return (NULL);
 	sp->fts_child = fts_build(sp, instr);
 	if (fchdir(fd)) {
 		(void)_close(fd);
 		return (NULL);
 	}
 	(void)_close(fd);
 	return (sp->fts_child);
 }
 
 #ifndef fts_get_clientptr
 #error "fts_get_clientptr not defined"
 #endif
 
 void *
 (__fts_get_clientptr_44bsd)(FTS *sp)
 {
 
 	return (fts_get_clientptr(sp));
 }
 
 #ifndef fts_get_stream
 #error "fts_get_stream not defined"
 #endif
 
 FTS *
 (__fts_get_stream_44bsd)(FTSENT *p)
 {
 	return (fts_get_stream(p));
 }
 
 void
 __fts_set_clientptr_44bsd(FTS *sp, void *clientptr)
 {
 
 	sp->fts_clientptr = clientptr;
 }
 
 /*
  * This is the tricky part -- do not casually change *anything* in here.  The
  * idea is to build the linked list of entries that are used by fts_children
  * and fts_read.  There are lots of special cases.
  *
  * The real slowdown in walking the tree is the stat calls.  If FTS_NOSTAT is
  * set and it's a physical walk (so that symbolic links can't be directories),
  * we can do things quickly.  First, if it's a 4.4BSD file system, the type
  * of the file is in the directory entry.  Otherwise, we assume that the number
  * of subdirectories in a node is equal to the number of links to the parent.
  * The former skips all stat calls.  The latter skips stat calls in any leaf
  * directories and for any files after the subdirectories in the directory have
  * been found, cutting the stat calls by about 2/3.
  */
 static FTSENT *
 fts_build(FTS *sp, int type)
 {
-	struct dirent *dp;
+	struct freebsd11_dirent *dp;
 	FTSENT *p, *head;
 	int nitems;
 	FTSENT *cur, *tail;
 	DIR *dirp;
 	void *oldaddr;
 	size_t dnamlen;
 	int cderrno, descend, len, level, maxlen, nlinks, oflag, saved_errno,
 	    nostat, doadjust;
 	char *cp;
 
 	/* Set current node pointer. */
 	cur = sp->fts_cur;
 
 	/*
 	 * Open the directory for reading.  If this fails, we're done.
 	 * If being called from fts_read, set the fts_info field.
 	 */
 #ifdef FTS_WHITEOUT
 	if (ISSET(FTS_WHITEOUT))
 		oflag = DTF_NODUP | DTF_REWIND;
 	else
 		oflag = DTF_HIDEW | DTF_NODUP | DTF_REWIND;
 #else
 #define __opendir2(path, flag) opendir(path)
 #endif
 	if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) {
 		if (type == BREAD) {
 			cur->fts_info = FTS_DNR;
 			cur->fts_errno = errno;
 		}
 		return (NULL);
 	}
 
 	/*
 	 * Nlinks is the number of possible entries of type directory in the
 	 * directory if we're cheating on stat calls, 0 if we're not doing
 	 * any stat calls at all, -1 if we're doing stats on everything.
 	 */
 	if (type == BNAMES) {
 		nlinks = 0;
 		/* Be quiet about nostat, GCC. */
 		nostat = 0;
 	} else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) {
 		if (fts_ufslinks(sp, cur))
 			nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2);
 		else
 			nlinks = -1;
 		nostat = 1;
 	} else {
 		nlinks = -1;
 		nostat = 0;
 	}
 
 #ifdef notdef
 	(void)printf("nlinks == %d (cur: %d)\n", nlinks, cur->fts_nlink);
 	(void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n",
 	    ISSET(FTS_NOSTAT), ISSET(FTS_PHYSICAL), ISSET(FTS_SEEDOT));
 #endif
 	/*
 	 * If we're going to need to stat anything or we want to descend
 	 * and stay in the directory, chdir.  If this fails we keep going,
 	 * but set a flag so we don't chdir after the post-order visit.
 	 * We won't be able to stat anything, but we can still return the
 	 * names themselves.  Note, that since fts_read won't be able to
 	 * chdir into the directory, it will have to return different path
 	 * names than before, i.e. "a/b" instead of "b".  Since the node
 	 * has already been visited in pre-order, have to wait until the
 	 * post-order visit to return the error.  There is a special case
 	 * here, if there was nothing to stat then it's not an error to
 	 * not be able to stat.  This is all fairly nasty.  If a program
 	 * needed sorted entries or stat information, they had better be
 	 * checking FTS_NS on the returned nodes.
 	 */
 	cderrno = 0;
 	if (nlinks || type == BREAD) {
 		if (fts_safe_changedir(sp, cur, _dirfd(dirp), NULL)) {
 			if (nlinks && type == BREAD)
 				cur->fts_errno = errno;
 			cur->fts_flags |= FTS_DONTCHDIR;
 			descend = 0;
 			cderrno = errno;
 		} else
 			descend = 1;
 	} else
 		descend = 0;
 
 	/*
 	 * Figure out the max file name length that can be stored in the
 	 * current path -- the inner loop allocates more path as necessary.
 	 * We really wouldn't have to do the maxlen calculations here, we
 	 * could do them in fts_read before returning the path, but it's a
 	 * lot easier here since the length is part of the dirent structure.
 	 *
 	 * If not changing directories set a pointer so that can just append
 	 * each new name into the path.
 	 */
 	len = NAPPEND(cur);
 	if (ISSET(FTS_NOCHDIR)) {
 		cp = sp->fts_path + len;
 		*cp++ = '/';
 	} else {
 		/* GCC, you're too verbose. */
 		cp = NULL;
 	}
 	len++;
 	maxlen = sp->fts_pathlen - len;
 
 	level = cur->fts_level + 1;
 
 	/* Read the directory, attaching each entry to the `link' pointer. */
 	doadjust = 0;
-	for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) {
+	for (head = tail = NULL, nitems = 0;
+	    dirp && (dp = freebsd11_readdir(dirp));) {
 		dnamlen = dp->d_namlen;
 		if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name))
 			continue;
 
 		if ((p = fts_alloc(sp, dp->d_name, (int)dnamlen)) == NULL)
 			goto mem1;
 		if (dnamlen >= maxlen) {	/* include space for NUL */
 			oldaddr = sp->fts_path;
 			if (fts_palloc(sp, dnamlen + len + 1)) {
 				/*
 				 * No more memory for path or structures.  Save
 				 * errno, free up the current structure and the
 				 * structures already allocated.
 				 */
 mem1:				saved_errno = errno;
 				if (p)
 					free(p);
 				fts_lfree(head);
 				(void)closedir(dirp);
 				cur->fts_info = FTS_ERR;
 				SET(FTS_STOP);
 				errno = saved_errno;
 				return (NULL);
 			}
 			/* Did realloc() change the pointer? */
 			if (oldaddr != sp->fts_path) {
 				doadjust = 1;
 				if (ISSET(FTS_NOCHDIR))
 					cp = sp->fts_path + len;
 			}
 			maxlen = sp->fts_pathlen - len;
 		}
 
 		if (len + dnamlen >= USHRT_MAX) {
 			/*
 			 * In an FTSENT, fts_pathlen is a u_short so it is
 			 * possible to wraparound here.  If we do, free up
 			 * the current structure and the structures already
 			 * allocated, then error out with ENAMETOOLONG.
 			 */
 			free(p);
 			fts_lfree(head);
 			(void)closedir(dirp);
 			cur->fts_info = FTS_ERR;
 			SET(FTS_STOP);
 			errno = ENAMETOOLONG;
 			return (NULL);
 		}
 		p->fts_level = level;
 		p->fts_parent = sp->fts_cur;
 		p->fts_pathlen = len + dnamlen;
 
 #ifdef FTS_WHITEOUT
 		if (dp->d_type == DT_WHT)
 			p->fts_flags |= FTS_ISW;
 #endif
 
 		if (cderrno) {
 			if (nlinks) {
 				p->fts_info = FTS_NS;
 				p->fts_errno = cderrno;
 			} else
 				p->fts_info = FTS_NSOK;
 			p->fts_accpath = cur->fts_accpath;
 		} else if (nlinks == 0
 #ifdef DT_DIR
 		    || (nostat &&
 		    dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN)
 #endif
 		    ) {
 			p->fts_accpath =
 			    ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name;
 			p->fts_info = FTS_NSOK;
 		} else {
 			/* Build a file name for fts_stat to stat. */
 			if (ISSET(FTS_NOCHDIR)) {
 				p->fts_accpath = p->fts_path;
 				memmove(cp, p->fts_name, p->fts_namelen + 1);
 			} else
 				p->fts_accpath = p->fts_name;
 			/* Stat it. */
 			p->fts_info = fts_stat(sp, p, 0);
 
 			/* Decrement link count if applicable. */
 			if (nlinks > 0 && (p->fts_info == FTS_D ||
 			    p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
 				--nlinks;
 		}
 
 		/* We walk in directory order so "ls -f" doesn't get upset. */
 		p->fts_link = NULL;
 		if (head == NULL)
 			head = tail = p;
 		else {
 			tail->fts_link = p;
 			tail = p;
 		}
 		++nitems;
 	}
 	if (dirp)
 		(void)closedir(dirp);
 
 	/*
 	 * If realloc() changed the address of the path, adjust the
 	 * addresses for the rest of the tree and the dir list.
 	 */
 	if (doadjust)
 		fts_padjust(sp, head);
 
 	/*
 	 * If not changing directories, reset the path back to original
 	 * state.
 	 */
 	if (ISSET(FTS_NOCHDIR)) {
 		if (len == sp->fts_pathlen || nitems == 0)
 			--cp;
 		*cp = '\0';
 	}
 
 	/*
 	 * If descended after called from fts_children or after called from
 	 * fts_read and nothing found, get back.  At the root level we use
 	 * the saved fd; if one of fts_open()'s arguments is a relative path
 	 * to an empty directory, we wind up here with no other way back.  If
 	 * can't get back, we're done.
 	 */
 	if (descend && (type == BCHILD || !nitems) &&
 	    (cur->fts_level == FTS_ROOTLEVEL ?
 	    FCHDIR(sp, sp->fts_rfd) :
 	    fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
 		cur->fts_info = FTS_ERR;
 		SET(FTS_STOP);
 		return (NULL);
 	}
 
 	/* If didn't find anything, return NULL. */
 	if (!nitems) {
 		if (type == BREAD)
 			cur->fts_info = FTS_DP;
 		return (NULL);
 	}
 
 	/* Sort the entries. */
 	if (sp->fts_compar && nitems > 1)
 		head = fts_sort(sp, head, nitems);
 	return (head);
 }
 
 static u_short
 fts_stat(FTS *sp, FTSENT *p, int follow)
 {
 	FTSENT *t;
-	dev_t dev;
-	ino_t ino;
-	struct stat *sbp, sb;
+	uint32_t dev;
+	uint32_t ino;
+	struct freebsd11_stat *sbp, sb;
 	int saved_errno;
 
 	/* If user needs stat info, stat buffer already allocated. */
 	sbp = ISSET(FTS_NOSTAT) ? &sb : p->fts_statp;
 
 #ifdef FTS_WHITEOUT
 	/* Check for whiteout. */
 	if (p->fts_flags & FTS_ISW) {
 		if (sbp != &sb) {
 			memset(sbp, '\0', sizeof(*sbp));
 			sbp->st_mode = S_IFWHT;
 		}
 		return (FTS_W);
 	}
 #endif
 
 	/*
 	 * If doing a logical walk, or application requested FTS_FOLLOW, do
 	 * a stat(2).  If that fails, check for a non-existent symlink.  If
 	 * fail, set the errno from the stat call.
 	 */
 	if (ISSET(FTS_LOGICAL) || follow) {
-		if (stat(p->fts_accpath, sbp)) {
+		if (freebsd11_stat(p->fts_accpath, sbp)) {
 			saved_errno = errno;
-			if (!lstat(p->fts_accpath, sbp)) {
+			if (!freebsd11_lstat(p->fts_accpath, sbp)) {
 				errno = 0;
 				return (FTS_SLNONE);
 			}
 			p->fts_errno = saved_errno;
 			goto err;
 		}
-	} else if (lstat(p->fts_accpath, sbp)) {
+	} else if (freebsd11_lstat(p->fts_accpath, sbp)) {
 		p->fts_errno = errno;
 err:		memset(sbp, 0, sizeof(struct stat));
 		return (FTS_NS);
 	}
 
 	if (S_ISDIR(sbp->st_mode)) {
 		/*
 		 * Set the device/inode.  Used to find cycles and check for
 		 * crossing mount points.  Also remember the link count, used
 		 * in fts_build to limit the number of stat calls.  It is
 		 * understood that these fields are only referenced if fts_info
 		 * is set to FTS_D.
 		 */
 		dev = p->fts_dev = sbp->st_dev;
 		ino = p->fts_ino = sbp->st_ino;
 		p->fts_nlink = sbp->st_nlink;
 
 		if (ISDOT(p->fts_name))
 			return (FTS_DOT);
 
 		/*
 		 * Cycle detection is done by brute force when the directory
 		 * is first encountered.  If the tree gets deep enough or the
 		 * number of symbolic links to directories is high enough,
 		 * something faster might be worthwhile.
 		 */
 		for (t = p->fts_parent;
 		    t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
 			if (ino == t->fts_ino && dev == t->fts_dev) {
 				p->fts_cycle = t;
 				return (FTS_DC);
 			}
 		return (FTS_D);
 	}
 	if (S_ISLNK(sbp->st_mode))
 		return (FTS_SL);
 	if (S_ISREG(sbp->st_mode))
 		return (FTS_F);
 	return (FTS_DEFAULT);
 }
 
 /*
  * The comparison function takes pointers to pointers to FTSENT structures.
  * Qsort wants a comparison function that takes pointers to void.
  * (Both with appropriate levels of const-poisoning, of course!)
  * Use a trampoline function to deal with the difference.
  */
 static int
 fts_compar(const void *a, const void *b)
 {
 	FTS *parent;
 
 	parent = (*(const FTSENT * const *)a)->fts_fts;
 	return (*parent->fts_compar)(a, b);
 }
 
 static FTSENT *
 fts_sort(FTS *sp, FTSENT *head, int nitems)
 {
 	FTSENT **ap, *p;
 
 	/*
 	 * Construct an array of pointers to the structures and call qsort(3).
 	 * Reassemble the array in the order returned by qsort.  If unable to
 	 * sort for memory reasons, return the directory entries in their
 	 * current order.  Allocate enough space for the current needs plus
 	 * 40 so don't realloc one entry at a time.
 	 */
 	if (nitems > sp->fts_nitems) {
 		sp->fts_nitems = nitems + 40;
 		if ((sp->fts_array = reallocf(sp->fts_array,
 		    sp->fts_nitems * sizeof(FTSENT *))) == NULL) {
 			sp->fts_nitems = 0;
 			return (head);
 		}
 	}
 	for (ap = sp->fts_array, p = head; p; p = p->fts_link)
 		*ap++ = p;
 	qsort(sp->fts_array, nitems, sizeof(FTSENT *), fts_compar);
 	for (head = *(ap = sp->fts_array); --nitems; ++ap)
 		ap[0]->fts_link = ap[1];
 	ap[0]->fts_link = NULL;
 	return (head);
 }
 
 static FTSENT *
 fts_alloc(FTS *sp, char *name, int namelen)
 {
 	FTSENT *p;
 	size_t len;
 
 	struct ftsent_withstat {
 		FTSENT	ent;
-		struct	stat statbuf;
+		struct	freebsd11_stat statbuf;
 	};
 
 	/*
 	 * The file name is a variable length array and no stat structure is
 	 * necessary if the user has set the nostat bit.  Allocate the FTSENT
 	 * structure, the file name and the stat structure in one chunk, but
 	 * be careful that the stat structure is reasonably aligned.
 	 */
 	if (ISSET(FTS_NOSTAT))
 		len = sizeof(FTSENT) + namelen + 1;
 	else
 		len = sizeof(struct ftsent_withstat) + namelen + 1;
 
 	if ((p = malloc(len)) == NULL)
 		return (NULL);
 
 	if (ISSET(FTS_NOSTAT)) {
 		p->fts_name = (char *)(p + 1);
 		p->fts_statp = NULL;
 	} else {
 		p->fts_name = (char *)((struct ftsent_withstat *)p + 1);
 		p->fts_statp = &((struct ftsent_withstat *)p)->statbuf;
 	}
 
 	/* Copy the name and guarantee NUL termination. */
 	memcpy(p->fts_name, name, namelen);
 	p->fts_name[namelen] = '\0';
 	p->fts_namelen = namelen;
 	p->fts_path = sp->fts_path;
 	p->fts_errno = 0;
 	p->fts_flags = 0;
 	p->fts_instr = FTS_NOINSTR;
 	p->fts_number = 0;
 	p->fts_pointer = NULL;
 	p->fts_fts = sp;
 	return (p);
 }
 
 static void
 fts_lfree(FTSENT *head)
 {
 	FTSENT *p;
 
 	/* Free a linked list of structures. */
 	while ((p = head)) {
 		head = head->fts_link;
 		free(p);
 	}
 }
 
 /*
  * Allow essentially unlimited paths; find, rm, ls should all work on any tree.
  * Most systems will allow creation of paths much longer than MAXPATHLEN, even
  * though the kernel won't resolve them.  Add the size (not just what's needed)
  * plus 256 bytes so don't realloc the path 2 bytes at a time.
  */
 static int
 fts_palloc(FTS *sp, size_t more)
 {
 
 	sp->fts_pathlen += more + 256;
 	/*
 	 * Check for possible wraparound.  In an FTS, fts_pathlen is
 	 * a signed int but in an FTSENT it is an unsigned short.
 	 * We limit fts_pathlen to USHRT_MAX to be safe in both cases.
 	 */
 	if (sp->fts_pathlen < 0 || sp->fts_pathlen >= USHRT_MAX) {
 		if (sp->fts_path)
 			free(sp->fts_path);
 		sp->fts_path = NULL;
 		errno = ENAMETOOLONG;
 		return (1);
 	}
 	sp->fts_path = reallocf(sp->fts_path, sp->fts_pathlen);
 	return (sp->fts_path == NULL);
 }
 
 /*
  * When the path is realloc'd, have to fix all of the pointers in structures
  * already returned.
  */
 static void
 fts_padjust(FTS *sp, FTSENT *head)
 {
 	FTSENT *p;
 	char *addr = sp->fts_path;
 
 #define	ADJUST(p) do {							\
 	if ((p)->fts_accpath != (p)->fts_name) {			\
 		(p)->fts_accpath =					\
 		    (char *)addr + ((p)->fts_accpath - (p)->fts_path);	\
 	}								\
 	(p)->fts_path = addr;						\
 } while (0)
 	/* Adjust the current set of children. */
 	for (p = sp->fts_child; p; p = p->fts_link)
 		ADJUST(p);
 
 	/* Adjust the rest of the tree, including the current level. */
 	for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
 		ADJUST(p);
 		p = p->fts_link ? p->fts_link : p->fts_parent;
 	}
 }
 
 static size_t
 fts_maxarglen(char * const *argv)
 {
 	size_t len, max;
 
 	for (max = 0; *argv; ++argv)
 		if ((len = strlen(*argv)) > max)
 			max = len;
 	return (max + 1);
 }
 
 /*
  * Change to dir specified by fd or p->fts_accpath without getting
  * tricked by someone changing the world out from underneath us.
  * Assumes p->fts_dev and p->fts_ino are filled in.
  */
 static int
 fts_safe_changedir(FTS *sp, FTSENT *p, int fd, char *path)
 {
 	int ret, oerrno, newfd;
-	struct stat sb;
+	struct freebsd11_stat sb;
 
 	newfd = fd;
 	if (ISSET(FTS_NOCHDIR))
 		return (0);
 	if (fd < 0 && (newfd = _open(path, O_RDONLY | O_CLOEXEC, 0)) < 0)
 		return (-1);
-	if (_fstat(newfd, &sb)) {
+	if (freebsd11_fstat(newfd, &sb)) {
 		ret = -1;
 		goto bail;
 	}
 	if (p->fts_dev != sb.st_dev || p->fts_ino != sb.st_ino) {
 		errno = ENOENT;		/* disinformation */
 		ret = -1;
 		goto bail;
 	}
 	ret = fchdir(newfd);
 bail:
 	oerrno = errno;
 	if (fd < 0)
 		(void)_close(newfd);
 	errno = oerrno;
 	return (ret);
 }
 
 /*
  * Check if the filesystem for "ent" has UFS-style links.
  */
 static int
 fts_ufslinks(FTS *sp, const FTSENT *ent)
 {
 	struct _fts_private *priv;
 	const char **cpp;
 
 	priv = (struct _fts_private *)sp;
 	/*
 	 * If this node's device is different from the previous, grab
 	 * the filesystem information, and decide on the reliability
 	 * of the link information from this filesystem for stat(2)
 	 * avoidance.
 	 */
 	if (priv->ftsp_dev != ent->fts_dev) {
-		if (statfs(ent->fts_path, &priv->ftsp_statfs) != -1) {
+		if (freebsd11_statfs(ent->fts_path, &priv->ftsp_statfs) != -1) {
 			priv->ftsp_dev = ent->fts_dev;
 			priv->ftsp_linksreliable = 0;
 			for (cpp = ufslike_filesystems; *cpp; cpp++) {
 				if (strcmp(priv->ftsp_statfs.f_fstypename,
 				    *cpp) == 0) {
 					priv->ftsp_linksreliable = 1;
 					break;
 				}
 			}
 		} else {
 			priv->ftsp_linksreliable = 0;
 		}
 	}
 	return (priv->ftsp_linksreliable);
 }
 
 __sym_compat(fts_open, __fts_open_44bsd, FBSD_1.0);
 __sym_compat(fts_close, __fts_close_44bsd, FBSD_1.0);
 __sym_compat(fts_read, __fts_read_44bsd, FBSD_1.0);
 __sym_compat(fts_set, __fts_set_44bsd, FBSD_1.0);
 __sym_compat(fts_children, __fts_children_44bsd, FBSD_1.0);
 __sym_compat(fts_get_clientptr, __fts_get_clientptr_44bsd, FBSD_1.0);
 __sym_compat(fts_get_stream, __fts_get_stream_44bsd, FBSD_1.0);
 __sym_compat(fts_set_clientptr, __fts_set_clientptr_44bsd, FBSD_1.0);
Index: head/lib/libc/gen/fts-compat.h
===================================================================
--- head/lib/libc/gen/fts-compat.h	(revision 318735)
+++ head/lib/libc/gen/fts-compat.h	(revision 318736)
@@ -1,128 +1,128 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fts.h	8.3 (Berkeley) 8/14/94
  * $FreeBSD$
  */
 
 #ifndef	_FTS_H_
 #define	_FTS_H_
 
 typedef struct {
 	struct _ftsent *fts_cur;	/* current node */
 	struct _ftsent *fts_child;	/* linked list of children */
 	struct _ftsent **fts_array;	/* sort array */
-	dev_t fts_dev;			/* starting device # */
+	uint32_t fts_dev;		/* starting device # */
 	char *fts_path;			/* path for this descent */
 	int fts_rfd;			/* fd for root */
 	int fts_pathlen;		/* sizeof(path) */
 	int fts_nitems;			/* elements in the sort array */
 	int (*fts_compar)		/* compare function */
 	    (const struct _ftsent * const *, const struct _ftsent * const *);
 
 #define	FTS_COMFOLLOW	0x001		/* follow command line symlinks */
 #define	FTS_LOGICAL	0x002		/* logical walk */
 #define	FTS_NOCHDIR	0x004		/* don't change directories */
 #define	FTS_NOSTAT	0x008		/* don't get stat info */
 #define	FTS_PHYSICAL	0x010		/* physical walk */
 #define	FTS_SEEDOT	0x020		/* return dot and dot-dot */
 #define	FTS_XDEV	0x040		/* don't cross devices */
 #define	FTS_WHITEOUT	0x080		/* return whiteout information */
 #define	FTS_OPTIONMASK	0x0ff		/* valid user option mask */
 
 #define	FTS_NAMEONLY	0x100		/* (private) child names only */
 #define	FTS_STOP	0x200		/* (private) unrecoverable error */
 	int fts_options;		/* fts_open options, global flags */
 	void *fts_clientptr;		/* thunk for sort function */
 } FTS;
 
 typedef struct _ftsent {
 	struct _ftsent *fts_cycle;	/* cycle node */
 	struct _ftsent *fts_parent;	/* parent directory */
 	struct _ftsent *fts_link;	/* next file in directory */
 	union {
 		struct {
 			long __fts_number;	/* local numeric value */
 			void *__fts_pointer;	/* local address value */
 		} __struct_ftsent;
 		int64_t __fts_bignum;
 	} __union_ftsent;
 #define	fts_number	__union_ftsent.__struct_ftsent.__fts_number
 #define	fts_pointer	__union_ftsent.__struct_ftsent.__fts_pointer
 #define	fts_bignum	__union_ftsent.__fts_bignum
 	char *fts_accpath;		/* access path */
 	char *fts_path;			/* root path */
 	int fts_errno;			/* errno for this node */
 	int fts_symfd;			/* fd for symlink */
 	u_short fts_pathlen;		/* strlen(fts_path) */
 	u_short fts_namelen;		/* strlen(fts_name) */
 
-	ino_t fts_ino;			/* inode */
-	dev_t fts_dev;			/* device */
-	nlink_t fts_nlink;		/* link count */
+	uint32_t fts_ino;		/* inode */
+	uint32_t fts_dev;		/* device */
+	uint16_t fts_nlink;		/* link count */
 
 #define	FTS_ROOTPARENTLEVEL	-1
 #define	FTS_ROOTLEVEL		 0
 	short fts_level;		/* depth (-1 to N) */
 
 #define	FTS_D		 1		/* preorder directory */
 #define	FTS_DC		 2		/* directory that causes cycles */
 #define	FTS_DEFAULT	 3		/* none of the above */
 #define	FTS_DNR		 4		/* unreadable directory */
 #define	FTS_DOT		 5		/* dot or dot-dot */
 #define	FTS_DP		 6		/* postorder directory */
 #define	FTS_ERR		 7		/* error; errno is set */
 #define	FTS_F		 8		/* regular file */
 #define	FTS_INIT	 9		/* initialized only */
 #define	FTS_NS		10		/* stat(2) failed */
 #define	FTS_NSOK	11		/* no stat(2) requested */
 #define	FTS_SL		12		/* symbolic link */
 #define	FTS_SLNONE	13		/* symbolic link without target */
 #define	FTS_W		14		/* whiteout object */
 	u_short fts_info;		/* user flags for FTSENT structure */
 
 #define	FTS_DONTCHDIR	 0x01		/* don't chdir .. to the parent */
 #define	FTS_SYMFOLLOW	 0x02		/* followed a symlink to get here */
 #define	FTS_ISW		 0x04		/* this is a whiteout object */
 	u_short fts_flags;		/* private flags for FTSENT structure */
 
 #define	FTS_AGAIN	 1		/* read node again */
 #define	FTS_FOLLOW	 2		/* follow symbolic link */
 #define	FTS_NOINSTR	 3		/* no instructions */
 #define	FTS_SKIP	 4		/* discard node */
 	u_short fts_instr;		/* fts_set() instructions */
 
-	struct stat *fts_statp;		/* stat(2) information */
+	struct freebsd11_stat *fts_statp; /* stat(2) information */
 	char *fts_name;			/* file name */
 	FTS *fts_fts;			/* back pointer to main FTS */
 } FTSENT;
 
 #define	 fts_get_clientptr(fts)	((fts)->fts_clientptr)
 #define	 fts_get_stream(ftsent)	((ftsent)->fts_fts)
 
 #endif /* !_FTS_H_ */
Index: head/lib/libc/gen/fts-compat11.c
===================================================================
--- head/lib/libc/gen/fts-compat11.c	(nonexistent)
+++ head/lib/libc/gen/fts-compat11.c	(revision 318736)
@@ -0,0 +1,1199 @@
+/*-
+ * Copyright (c) 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: $OpenBSD: fts.c,v 1.22 1999/10/03 19:22:22 millert Exp $
+ */
+
+#if 0
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)fts.c	8.6 (Berkeley) 8/14/94";
+#endif /* LIBC_SCCS and not lint */
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#define	_WANT_FREEBSD11_STATFS
+#include <sys/mount.h>
+#define	_WANT_FREEBSD11_STAT
+#include <sys/stat.h>
+
+#define	_WANT_FREEBSD11_DIRENT
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fts.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "gen-compat.h"
+#include "fts-compat11.h"
+#include "un-namespace.h"
+
+#include "gen-private.h"
+
+static FTSENT11	*fts_alloc(FTS11 *, char *, size_t);
+static FTSENT11	*fts_build(FTS11 *, int);
+static void	 fts_lfree(FTSENT11 *);
+static void	 fts_load(FTS11 *, FTSENT11 *);
+static size_t	 fts_maxarglen(char * const *);
+static void	 fts_padjust(FTS11 *, FTSENT11 *);
+static int	 fts_palloc(FTS11 *, size_t);
+static FTSENT11	*fts_sort(FTS11 *, FTSENT11 *, size_t);
+static int	 fts_stat(FTS11 *, FTSENT11 *, int, int);
+static int	 fts_safe_changedir(FTS11 *, FTSENT11 *, int, char *);
+static int	 fts_ufslinks(FTS11 *, const FTSENT11 *);
+
+#define	ISDOT(a)	(a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2])))
+
+#define	CLR(opt)	(sp->fts_options &= ~(opt))
+#define	ISSET(opt)	(sp->fts_options & (opt))
+#define	SET(opt)	(sp->fts_options |= (opt))
+
+#define	FCHDIR(sp, fd)	(!ISSET(FTS_NOCHDIR) && fchdir(fd))
+
+/* fts_build flags */
+#define	BCHILD		1		/* fts_children */
+#define	BNAMES		2		/* fts_children, names only */
+#define	BREAD		3		/* fts_read */
+
+/*
+ * Internal representation of an FTS, including extra implementation
+ * details.  The FTS returned from fts_open points to this structure's
+ * ftsp_fts member (and can be cast to an _fts_private as required)
+ */
+struct _fts_private11 {
+	FTS11		ftsp_fts;
+	struct freebsd11_statfs	ftsp_statfs;
+	uint32_t	ftsp_dev;
+	int		ftsp_linksreliable;
+};
+
+/*
+ * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it
+ * knows that a directory could not possibly have subdirectories.  This
+ * is decided by looking at the link count: a subdirectory would
+ * increment its parent's link count by virtue of its own ".." entry.
+ * This assumption only holds for UFS-like filesystems that implement
+ * links and directories this way, so we must punt for others.
+ */
+
+static const char *ufslike_filesystems[] = {
+	"ufs",
+	"zfs",
+	"nfs",
+	"ext2fs",
+	0
+};
+
+FTS11 *
+freebsd11_fts_open(char * const *argv, int options,
+    int (*compar)(const FTSENT11 * const *, const FTSENT11 * const *))
+{
+	struct _fts_private11 *priv;
+	FTS11 *sp;
+	FTSENT11 *p, *root;
+	FTSENT11 *parent, *tmp;
+	size_t len, nitems;
+
+	/* Options check. */
+	if (options & ~FTS_OPTIONMASK) {
+		errno = EINVAL;
+		return (NULL);
+	}
+
+	/* fts_open() requires at least one path */
+	if (*argv == NULL) {
+		errno = EINVAL;
+		return (NULL);
+	}
+
+	/* Allocate/initialize the stream. */
+	if ((priv = calloc(1, sizeof(*priv))) == NULL)
+		return (NULL);
+	sp = &priv->ftsp_fts;
+	sp->fts_compar = compar;
+	sp->fts_options = options;
+
+	/* Shush, GCC. */
+	tmp = NULL;
+
+	/* Logical walks turn on NOCHDIR; symbolic links are too hard. */
+	if (ISSET(FTS_LOGICAL))
+		SET(FTS_NOCHDIR);
+
+	/*
+	 * Start out with 1K of path space, and enough, in any case,
+	 * to hold the user's paths.
+	 */
+	if (fts_palloc(sp, MAX(fts_maxarglen(argv), MAXPATHLEN)))
+		goto mem1;
+
+	/* Allocate/initialize root's parent. */
+	if ((parent = fts_alloc(sp, "", 0)) == NULL)
+		goto mem2;
+	parent->fts_level = FTS_ROOTPARENTLEVEL;
+
+	/* Allocate/initialize root(s). */
+	for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
+		len = strlen(*argv);
+
+		p = fts_alloc(sp, *argv, len);
+		p->fts_level = FTS_ROOTLEVEL;
+		p->fts_parent = parent;
+		p->fts_accpath = p->fts_name;
+		p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), -1);
+
+		/* Command-line "." and ".." are real directories. */
+		if (p->fts_info == FTS_DOT)
+			p->fts_info = FTS_D;
+
+		/*
+		 * If comparison routine supplied, traverse in sorted
+		 * order; otherwise traverse in the order specified.
+		 */
+		if (compar) {
+			p->fts_link = root;
+			root = p;
+		} else {
+			p->fts_link = NULL;
+			if (root == NULL)
+				tmp = root = p;
+			else {
+				tmp->fts_link = p;
+				tmp = p;
+			}
+		}
+	}
+	if (compar && nitems > 1)
+		root = fts_sort(sp, root, nitems);
+
+	/*
+	 * Allocate a dummy pointer and make fts_read think that we've just
+	 * finished the node before the root(s); set p->fts_info to FTS_INIT
+	 * so that everything about the "current" node is ignored.
+	 */
+	if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL)
+		goto mem3;
+	sp->fts_cur->fts_link = root;
+	sp->fts_cur->fts_info = FTS_INIT;
+
+	/*
+	 * If using chdir(2), grab a file descriptor pointing to dot to ensure
+	 * that we can get back here; this could be avoided for some paths,
+	 * but almost certainly not worth the effort.  Slashes, symbolic links,
+	 * and ".." are all fairly nasty problems.  Note, if we can't get the
+	 * descriptor we run anyway, just more slowly.
+	 */
+	if (!ISSET(FTS_NOCHDIR) &&
+	    (sp->fts_rfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0)
+		SET(FTS_NOCHDIR);
+
+	return (sp);
+
+mem3:	fts_lfree(root);
+	free(parent);
+mem2:	free(sp->fts_path);
+mem1:	free(sp);
+	return (NULL);
+}
+
+static void
+fts_load(FTS11 *sp, FTSENT11 *p)
+{
+	size_t len;
+	char *cp;
+
+	/*
+	 * Load the stream structure for the next traversal.  Since we don't
+	 * actually enter the directory until after the preorder visit, set
+	 * the fts_accpath field specially so the chdir gets done to the right
+	 * place and the user can access the first node.  From fts_open it's
+	 * known that the path will fit.
+	 */
+	len = p->fts_pathlen = p->fts_namelen;
+	memmove(sp->fts_path, p->fts_name, len + 1);
+	if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) {
+		len = strlen(++cp);
+		memmove(p->fts_name, cp, len + 1);
+		p->fts_namelen = len;
+	}
+	p->fts_accpath = p->fts_path = sp->fts_path;
+	sp->fts_dev = p->fts_dev;
+}
+
+int
+freebsd11_fts_close(FTS11 *sp)
+{
+	FTSENT11 *freep, *p;
+	int saved_errno;
+
+	/*
+	 * This still works if we haven't read anything -- the dummy structure
+	 * points to the root list, so we step through to the end of the root
+	 * list which has a valid parent pointer.
+	 */
+	if (sp->fts_cur) {
+		for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) {
+			freep = p;
+			p = p->fts_link != NULL ? p->fts_link : p->fts_parent;
+			free(freep);
+		}
+		free(p);
+	}
+
+	/* Free up child linked list, sort array, path buffer. */
+	if (sp->fts_child)
+		fts_lfree(sp->fts_child);
+	if (sp->fts_array)
+		free(sp->fts_array);
+	free(sp->fts_path);
+
+	/* Return to original directory, save errno if necessary. */
+	if (!ISSET(FTS_NOCHDIR)) {
+		saved_errno = fchdir(sp->fts_rfd) ? errno : 0;
+		(void)_close(sp->fts_rfd);
+
+		/* Set errno and return. */
+		if (saved_errno != 0) {
+			/* Free up the stream pointer. */
+			free(sp);
+			errno = saved_errno;
+			return (-1);
+		}
+	}
+
+	/* Free up the stream pointer. */
+	free(sp);
+	return (0);
+}
+
+/*
+ * Special case of "/" at the end of the path so that slashes aren't
+ * appended which would cause paths to be written as "....//foo".
+ */
+#define	NAPPEND(p)							\
+	(p->fts_path[p->fts_pathlen - 1] == '/'				\
+	    ? p->fts_pathlen - 1 : p->fts_pathlen)
+
+FTSENT11 *
+freebsd11_fts_read(FTS11 *sp)
+{
+	FTSENT11 *p, *tmp;
+	int instr;
+	char *t;
+	int saved_errno;
+
+	/* If finished or unrecoverable error, return NULL. */
+	if (sp->fts_cur == NULL || ISSET(FTS_STOP))
+		return (NULL);
+
+	/* Set current node pointer. */
+	p = sp->fts_cur;
+
+	/* Save and zero out user instructions. */
+	instr = p->fts_instr;
+	p->fts_instr = FTS_NOINSTR;
+
+	/* Any type of file may be re-visited; re-stat and re-turn. */
+	if (instr == FTS_AGAIN) {
+		p->fts_info = fts_stat(sp, p, 0, -1);
+		return (p);
+	}
+
+	/*
+	 * Following a symlink -- SLNONE test allows application to see
+	 * SLNONE and recover.  If indirecting through a symlink, have
+	 * keep a pointer to current location.  If unable to get that
+	 * pointer, follow fails.
+	 */
+	if (instr == FTS_FOLLOW &&
+	    (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
+		p->fts_info = fts_stat(sp, p, 1, -1);
+		if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
+			if ((p->fts_symfd = _open(".", O_RDONLY | O_CLOEXEC,
+			    0)) < 0) {
+				p->fts_errno = errno;
+				p->fts_info = FTS_ERR;
+			} else
+				p->fts_flags |= FTS_SYMFOLLOW;
+		}
+		return (p);
+	}
+
+	/* Directory in pre-order. */
+	if (p->fts_info == FTS_D) {
+		/* If skipped or crossed mount point, do post-order visit. */
+		if (instr == FTS_SKIP ||
+		    (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
+			if (p->fts_flags & FTS_SYMFOLLOW)
+				(void)_close(p->fts_symfd);
+			if (sp->fts_child) {
+				fts_lfree(sp->fts_child);
+				sp->fts_child = NULL;
+			}
+			p->fts_info = FTS_DP;
+			return (p);
+		}
+
+		/* Rebuild if only read the names and now traversing. */
+		if (sp->fts_child != NULL && ISSET(FTS_NAMEONLY)) {
+			CLR(FTS_NAMEONLY);
+			fts_lfree(sp->fts_child);
+			sp->fts_child = NULL;
+		}
+
+		/*
+		 * Cd to the subdirectory.
+		 *
+		 * If have already read and now fail to chdir, whack the list
+		 * to make the names come out right, and set the parent errno
+		 * so the application will eventually get an error condition.
+		 * Set the FTS_DONTCHDIR flag so that when we logically change
+		 * directories back to the parent we don't do a chdir.
+		 *
+		 * If haven't read do so.  If the read fails, fts_build sets
+		 * FTS_STOP or the fts_info field of the node.
+		 */
+		if (sp->fts_child != NULL) {
+			if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) {
+				p->fts_errno = errno;
+				p->fts_flags |= FTS_DONTCHDIR;
+				for (p = sp->fts_child; p != NULL;
+				    p = p->fts_link)
+					p->fts_accpath =
+					    p->fts_parent->fts_accpath;
+			}
+		} else if ((sp->fts_child = fts_build(sp, BREAD)) == NULL) {
+			if (ISSET(FTS_STOP))
+				return (NULL);
+			return (p);
+		}
+		p = sp->fts_child;
+		sp->fts_child = NULL;
+		goto name;
+	}
+
+	/* Move to the next node on this level. */
+next:	tmp = p;
+	if ((p = p->fts_link) != NULL) {
+		/*
+		 * If reached the top, return to the original directory (or
+		 * the root of the tree), and load the paths for the next root.
+		 */
+		if (p->fts_level == FTS_ROOTLEVEL) {
+			if (FCHDIR(sp, sp->fts_rfd)) {
+				SET(FTS_STOP);
+				return (NULL);
+			}
+			free(tmp);
+			fts_load(sp, p);
+			return (sp->fts_cur = p);
+		}
+
+		/*
+		 * User may have called fts_set on the node.  If skipped,
+		 * ignore.  If followed, get a file descriptor so we can
+		 * get back if necessary.
+		 */
+		if (p->fts_instr == FTS_SKIP) {
+			free(tmp);
+			goto next;
+		}
+		if (p->fts_instr == FTS_FOLLOW) {
+			p->fts_info = fts_stat(sp, p, 1, -1);
+			if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
+				if ((p->fts_symfd =
+				    _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) {
+					p->fts_errno = errno;
+					p->fts_info = FTS_ERR;
+				} else
+					p->fts_flags |= FTS_SYMFOLLOW;
+			}
+			p->fts_instr = FTS_NOINSTR;
+		}
+
+		free(tmp);
+
+name:		t = sp->fts_path + NAPPEND(p->fts_parent);
+		*t++ = '/';
+		memmove(t, p->fts_name, p->fts_namelen + 1);
+		return (sp->fts_cur = p);
+	}
+
+	/* Move up to the parent node. */
+	p = tmp->fts_parent;
+
+	if (p->fts_level == FTS_ROOTPARENTLEVEL) {
+		/*
+		 * Done; free everything up and set errno to 0 so the user
+		 * can distinguish between error and EOF.
+		 */
+		free(tmp);
+		free(p);
+		errno = 0;
+		return (sp->fts_cur = NULL);
+	}
+
+	/* NUL terminate the pathname. */
+	sp->fts_path[p->fts_pathlen] = '\0';
+
+	/*
+	 * Return to the parent directory.  If at a root node or came through
+	 * a symlink, go back through the file descriptor.  Otherwise, cd up
+	 * one directory.
+	 */
+	if (p->fts_level == FTS_ROOTLEVEL) {
+		if (FCHDIR(sp, sp->fts_rfd)) {
+			SET(FTS_STOP);
+			return (NULL);
+		}
+	} else if (p->fts_flags & FTS_SYMFOLLOW) {
+		if (FCHDIR(sp, p->fts_symfd)) {
+			saved_errno = errno;
+			(void)_close(p->fts_symfd);
+			errno = saved_errno;
+			SET(FTS_STOP);
+			return (NULL);
+		}
+		(void)_close(p->fts_symfd);
+	} else if (!(p->fts_flags & FTS_DONTCHDIR) &&
+	    fts_safe_changedir(sp, p->fts_parent, -1, "..")) {
+		SET(FTS_STOP);
+		return (NULL);
+	}
+	free(tmp);
+	p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
+	return (sp->fts_cur = p);
+}
+
+/*
+ * Fts_set takes the stream as an argument although it's not used in this
+ * implementation; it would be necessary if anyone wanted to add global
+ * semantics to fts using fts_set.  An error return is allowed for similar
+ * reasons.
+ */
+/* ARGSUSED */
+int
+freebsd11_fts_set(FTS11 *sp, FTSENT11 *p, int instr)
+{
+	if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW &&
+	    instr != FTS_NOINSTR && instr != FTS_SKIP) {
+		errno = EINVAL;
+		return (1);
+	}
+	p->fts_instr = instr;
+	return (0);
+}
+
+FTSENT11 *
+freebsd11_fts_children(FTS11 *sp, int instr)
+{
+	FTSENT11 *p;
+	int fd, rc, serrno;
+
+	if (instr != 0 && instr != FTS_NAMEONLY) {
+		errno = EINVAL;
+		return (NULL);
+	}
+
+	/* Set current node pointer. */
+	p = sp->fts_cur;
+
+	/*
+	 * Errno set to 0 so user can distinguish empty directory from
+	 * an error.
+	 */
+	errno = 0;
+
+	/* Fatal errors stop here. */
+	if (ISSET(FTS_STOP))
+		return (NULL);
+
+	/* Return logical hierarchy of user's arguments. */
+	if (p->fts_info == FTS_INIT)
+		return (p->fts_link);
+
+	/*
+	 * If not a directory being visited in pre-order, stop here.  Could
+	 * allow FTS_DNR, assuming the user has fixed the problem, but the
+	 * same effect is available with FTS_AGAIN.
+	 */
+	if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */)
+		return (NULL);
+
+	/* Free up any previous child list. */
+	if (sp->fts_child != NULL)
+		fts_lfree(sp->fts_child);
+
+	if (instr == FTS_NAMEONLY) {
+		SET(FTS_NAMEONLY);
+		instr = BNAMES;
+	} else
+		instr = BCHILD;
+
+	/*
+	 * If using chdir on a relative path and called BEFORE fts_read does
+	 * its chdir to the root of a traversal, we can lose -- we need to
+	 * chdir into the subdirectory, and we don't know where the current
+	 * directory is, so we can't get back so that the upcoming chdir by
+	 * fts_read will work.
+	 */
+	if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' ||
+	    ISSET(FTS_NOCHDIR))
+		return (sp->fts_child = fts_build(sp, instr));
+
+	if ((fd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0)
+		return (NULL);
+	sp->fts_child = fts_build(sp, instr);
+	serrno = (sp->fts_child == NULL) ? errno : 0;
+	rc = fchdir(fd);
+	if (rc < 0 && serrno == 0)
+		serrno = errno;
+	(void)_close(fd);
+	errno = serrno;
+	if (rc < 0)
+		return (NULL);
+	return (sp->fts_child);
+}
+
+#ifndef freebsd11_fts_get_clientptr
+#error "freebsd11_fts_get_clientptr not defined"
+#endif
+
+void *
+(freebsd11_fts_get_clientptr)(FTS11 *sp)
+{
+
+	return (freebsd11_fts_get_clientptr(sp));
+}
+
+#ifndef freebsd11_fts_get_stream
+#error "freebsd11_fts_get_stream not defined"
+#endif
+
+FTS11 *
+(freebsd11_fts_get_stream)(FTSENT11 *p)
+{
+	return (freebsd11_fts_get_stream(p));
+}
+
+void
+freebsd11_fts_set_clientptr(FTS11 *sp, void *clientptr)
+{
+
+	sp->fts_clientptr = clientptr;
+}
+
+/*
+ * This is the tricky part -- do not casually change *anything* in here.  The
+ * idea is to build the linked list of entries that are used by fts_children
+ * and fts_read.  There are lots of special cases.
+ *
+ * The real slowdown in walking the tree is the stat calls.  If FTS_NOSTAT is
+ * set and it's a physical walk (so that symbolic links can't be directories),
+ * we can do things quickly.  First, if it's a 4.4BSD file system, the type
+ * of the file is in the directory entry.  Otherwise, we assume that the number
+ * of subdirectories in a node is equal to the number of links to the parent.
+ * The former skips all stat calls.  The latter skips stat calls in any leaf
+ * directories and for any files after the subdirectories in the directory have
+ * been found, cutting the stat calls by about 2/3.
+ */
+static FTSENT11 *
+fts_build(FTS11 *sp, int type)
+{
+	struct freebsd11_dirent *dp;
+	FTSENT11 *p, *head;
+	FTSENT11 *cur, *tail;
+	DIR *dirp;
+	void *oldaddr;
+	char *cp;
+	int cderrno, descend, oflag, saved_errno, nostat, doadjust;
+	long level;
+	long nlinks;	/* has to be signed because -1 is a magic value */
+	size_t dnamlen, len, maxlen, nitems;
+
+	/* Set current node pointer. */
+	cur = sp->fts_cur;
+
+	/*
+	 * Open the directory for reading.  If this fails, we're done.
+	 * If being called from fts_read, set the fts_info field.
+	 */
+#ifdef FTS_WHITEOUT
+	if (ISSET(FTS_WHITEOUT))
+		oflag = DTF_NODUP | DTF_REWIND;
+	else
+		oflag = DTF_HIDEW | DTF_NODUP | DTF_REWIND;
+#else
+#define __opendir2(path, flag) opendir(path)
+#endif
+	if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) {
+		if (type == BREAD) {
+			cur->fts_info = FTS_DNR;
+			cur->fts_errno = errno;
+		}
+		return (NULL);
+	}
+
+	/*
+	 * Nlinks is the number of possible entries of type directory in the
+	 * directory if we're cheating on stat calls, 0 if we're not doing
+	 * any stat calls at all, -1 if we're doing stats on everything.
+	 */
+	if (type == BNAMES) {
+		nlinks = 0;
+		/* Be quiet about nostat, GCC. */
+		nostat = 0;
+	} else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) {
+		if (fts_ufslinks(sp, cur))
+			nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2);
+		else
+			nlinks = -1;
+		nostat = 1;
+	} else {
+		nlinks = -1;
+		nostat = 0;
+	}
+
+#ifdef notdef
+	(void)printf("nlinks == %d (cur: %d)\n", nlinks, cur->fts_nlink);
+	(void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n",
+	    ISSET(FTS_NOSTAT), ISSET(FTS_PHYSICAL), ISSET(FTS_SEEDOT));
+#endif
+	/*
+	 * If we're going to need to stat anything or we want to descend
+	 * and stay in the directory, chdir.  If this fails we keep going,
+	 * but set a flag so we don't chdir after the post-order visit.
+	 * We won't be able to stat anything, but we can still return the
+	 * names themselves.  Note, that since fts_read won't be able to
+	 * chdir into the directory, it will have to return different path
+	 * names than before, i.e. "a/b" instead of "b".  Since the node
+	 * has already been visited in pre-order, have to wait until the
+	 * post-order visit to return the error.  There is a special case
+	 * here, if there was nothing to stat then it's not an error to
+	 * not be able to stat.  This is all fairly nasty.  If a program
+	 * needed sorted entries or stat information, they had better be
+	 * checking FTS_NS on the returned nodes.
+	 */
+	cderrno = 0;
+	if (nlinks || type == BREAD) {
+		if (fts_safe_changedir(sp, cur, _dirfd(dirp), NULL)) {
+			if (nlinks && type == BREAD)
+				cur->fts_errno = errno;
+			cur->fts_flags |= FTS_DONTCHDIR;
+			descend = 0;
+			cderrno = errno;
+		} else
+			descend = 1;
+	} else
+		descend = 0;
+
+	/*
+	 * Figure out the max file name length that can be stored in the
+	 * current path -- the inner loop allocates more path as necessary.
+	 * We really wouldn't have to do the maxlen calculations here, we
+	 * could do them in fts_read before returning the path, but it's a
+	 * lot easier here since the length is part of the dirent structure.
+	 *
+	 * If not changing directories set a pointer so that can just append
+	 * each new name into the path.
+	 */
+	len = NAPPEND(cur);
+	if (ISSET(FTS_NOCHDIR)) {
+		cp = sp->fts_path + len;
+		*cp++ = '/';
+	} else {
+		/* GCC, you're too verbose. */
+		cp = NULL;
+	}
+	len++;
+	maxlen = sp->fts_pathlen - len;
+
+	level = cur->fts_level + 1;
+
+	/* Read the directory, attaching each entry to the `link' pointer. */
+	doadjust = 0;
+	for (head = tail = NULL, nitems = 0;
+	    dirp && (dp = freebsd11_readdir(dirp));) {
+		dnamlen = dp->d_namlen;
+		if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name))
+			continue;
+
+		if ((p = fts_alloc(sp, dp->d_name, dnamlen)) == NULL)
+			goto mem1;
+		if (dnamlen >= maxlen) {	/* include space for NUL */
+			oldaddr = sp->fts_path;
+			if (fts_palloc(sp, dnamlen + len + 1)) {
+				/*
+				 * No more memory for path or structures.  Save
+				 * errno, free up the current structure and the
+				 * structures already allocated.
+				 */
+mem1:				saved_errno = errno;
+				if (p)
+					free(p);
+				fts_lfree(head);
+				(void)closedir(dirp);
+				cur->fts_info = FTS_ERR;
+				SET(FTS_STOP);
+				errno = saved_errno;
+				return (NULL);
+			}
+			/* Did realloc() change the pointer? */
+			if (oldaddr != sp->fts_path) {
+				doadjust = 1;
+				if (ISSET(FTS_NOCHDIR))
+					cp = sp->fts_path + len;
+			}
+			maxlen = sp->fts_pathlen - len;
+		}
+
+		p->fts_level = level;
+		p->fts_parent = sp->fts_cur;
+		p->fts_pathlen = len + dnamlen;
+
+#ifdef FTS_WHITEOUT
+		if (dp->d_type == DT_WHT)
+			p->fts_flags |= FTS_ISW;
+#endif
+
+		if (cderrno) {
+			if (nlinks) {
+				p->fts_info = FTS_NS;
+				p->fts_errno = cderrno;
+			} else
+				p->fts_info = FTS_NSOK;
+			p->fts_accpath = cur->fts_accpath;
+		} else if (nlinks == 0
+#ifdef DT_DIR
+		    || (nostat &&
+		    dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN)
+#endif
+		    ) {
+			p->fts_accpath =
+			    ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name;
+			p->fts_info = FTS_NSOK;
+		} else {
+			/* Build a file name for fts_stat to stat. */
+			if (ISSET(FTS_NOCHDIR)) {
+				p->fts_accpath = p->fts_path;
+				memmove(cp, p->fts_name, p->fts_namelen + 1);
+				p->fts_info = fts_stat(sp, p, 0, _dirfd(dirp));
+			} else {
+				p->fts_accpath = p->fts_name;
+				p->fts_info = fts_stat(sp, p, 0, -1);
+			}
+
+			/* Decrement link count if applicable. */
+			if (nlinks > 0 && (p->fts_info == FTS_D ||
+			    p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
+				--nlinks;
+		}
+
+		/* We walk in directory order so "ls -f" doesn't get upset. */
+		p->fts_link = NULL;
+		if (head == NULL)
+			head = tail = p;
+		else {
+			tail->fts_link = p;
+			tail = p;
+		}
+		++nitems;
+	}
+	if (dirp)
+		(void)closedir(dirp);
+
+	/*
+	 * If realloc() changed the address of the path, adjust the
+	 * addresses for the rest of the tree and the dir list.
+	 */
+	if (doadjust)
+		fts_padjust(sp, head);
+
+	/*
+	 * If not changing directories, reset the path back to original
+	 * state.
+	 */
+	if (ISSET(FTS_NOCHDIR))
+		sp->fts_path[cur->fts_pathlen] = '\0';
+
+	/*
+	 * If descended after called from fts_children or after called from
+	 * fts_read and nothing found, get back.  At the root level we use
+	 * the saved fd; if one of fts_open()'s arguments is a relative path
+	 * to an empty directory, we wind up here with no other way back.  If
+	 * can't get back, we're done.
+	 */
+	if (descend && (type == BCHILD || !nitems) &&
+	    (cur->fts_level == FTS_ROOTLEVEL ?
+	    FCHDIR(sp, sp->fts_rfd) :
+	    fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
+		fts_lfree(head);
+		cur->fts_info = FTS_ERR;
+		SET(FTS_STOP);
+		return (NULL);
+	}
+
+	/* If didn't find anything, return NULL. */
+	if (!nitems) {
+		if (type == BREAD)
+			cur->fts_info = FTS_DP;
+		return (NULL);
+	}
+
+	/* Sort the entries. */
+	if (sp->fts_compar && nitems > 1)
+		head = fts_sort(sp, head, nitems);
+	return (head);
+}
+
+static int
+fts_stat(FTS11 *sp, FTSENT11 *p, int follow, int dfd)
+{
+	FTSENT11 *t;
+	uint32_t dev;
+	uint32_t ino;
+	struct freebsd11_stat *sbp, sb;
+	int saved_errno;
+	const char *path;
+
+	if (dfd == -1)
+		path = p->fts_accpath, dfd = AT_FDCWD;
+	else
+		path = p->fts_name;
+
+	/* If user needs stat info, stat buffer already allocated. */
+	sbp = ISSET(FTS_NOSTAT) ? &sb : p->fts_statp;
+
+#ifdef FTS_WHITEOUT
+	/* Check for whiteout. */
+	if (p->fts_flags & FTS_ISW) {
+		if (sbp != &sb) {
+			memset(sbp, '\0', sizeof(*sbp));
+			sbp->st_mode = S_IFWHT;
+		}
+		return (FTS_W);
+	}
+#endif
+
+	/*
+	 * If doing a logical walk, or application requested FTS_FOLLOW, do
+	 * a stat(2).  If that fails, check for a non-existent symlink.  If
+	 * fail, set the errno from the stat call.
+	 */
+	if (ISSET(FTS_LOGICAL) || follow) {
+		if (freebsd11_fstatat(dfd, path, sbp, 0)) {
+			saved_errno = errno;
+			if (freebsd11_fstatat(dfd, path, sbp,
+			    AT_SYMLINK_NOFOLLOW)) {
+				p->fts_errno = saved_errno;
+				goto err;
+			}
+			errno = 0;
+			if (S_ISLNK(sbp->st_mode))
+				return (FTS_SLNONE);
+		}
+	} else if (freebsd11_fstatat(dfd, path, sbp, AT_SYMLINK_NOFOLLOW)) {
+		p->fts_errno = errno;
+err:		memset(sbp, 0, sizeof(*sbp));
+		return (FTS_NS);
+	}
+
+	if (S_ISDIR(sbp->st_mode)) {
+		/*
+		 * Set the device/inode.  Used to find cycles and check for
+		 * crossing mount points.  Also remember the link count, used
+		 * in fts_build to limit the number of stat calls.  It is
+		 * understood that these fields are only referenced if fts_info
+		 * is set to FTS_D.
+		 */
+		dev = p->fts_dev = sbp->st_dev;
+		ino = p->fts_ino = sbp->st_ino;
+		p->fts_nlink = sbp->st_nlink;
+
+		if (ISDOT(p->fts_name))
+			return (FTS_DOT);
+
+		/*
+		 * Cycle detection is done by brute force when the directory
+		 * is first encountered.  If the tree gets deep enough or the
+		 * number of symbolic links to directories is high enough,
+		 * something faster might be worthwhile.
+		 */
+		for (t = p->fts_parent;
+		    t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent)
+			if (ino == t->fts_ino && dev == t->fts_dev) {
+				p->fts_cycle = t;
+				return (FTS_DC);
+			}
+		return (FTS_D);
+	}
+	if (S_ISLNK(sbp->st_mode))
+		return (FTS_SL);
+	if (S_ISREG(sbp->st_mode))
+		return (FTS_F);
+	return (FTS_DEFAULT);
+}
+
+/*
+ * The comparison function takes pointers to pointers to FTSENT structures.
+ * Qsort wants a comparison function that takes pointers to void.
+ * (Both with appropriate levels of const-poisoning, of course!)
+ * Use a trampoline function to deal with the difference.
+ */
+static int
+fts_compar(const void *a, const void *b)
+{
+	FTS11 *parent;
+
+	parent = (*(const FTSENT11 * const *)a)->fts_fts;
+	return (*parent->fts_compar)(a, b);
+}
+
+static FTSENT11 *
+fts_sort(FTS11 *sp, FTSENT11 *head, size_t nitems)
+{
+	FTSENT11 **ap, *p;
+
+	/*
+	 * Construct an array of pointers to the structures and call qsort(3).
+	 * Reassemble the array in the order returned by qsort.  If unable to
+	 * sort for memory reasons, return the directory entries in their
+	 * current order.  Allocate enough space for the current needs plus
+	 * 40 so don't realloc one entry at a time.
+	 */
+	if (nitems > sp->fts_nitems) {
+		sp->fts_nitems = nitems + 40;
+		if ((sp->fts_array = reallocf(sp->fts_array,
+		    sp->fts_nitems * sizeof(FTSENT11 *))) == NULL) {
+			sp->fts_nitems = 0;
+			return (head);
+		}
+	}
+	for (ap = sp->fts_array, p = head; p; p = p->fts_link)
+		*ap++ = p;
+	qsort(sp->fts_array, nitems, sizeof(FTSENT11 *), fts_compar);
+	for (head = *(ap = sp->fts_array); --nitems; ++ap)
+		ap[0]->fts_link = ap[1];
+	ap[0]->fts_link = NULL;
+	return (head);
+}
+
+static FTSENT11 *
+fts_alloc(FTS11 *sp, char *name, size_t namelen)
+{
+	FTSENT11 *p;
+	size_t len;
+
+	struct ftsent11_withstat {
+		FTSENT11	ent;
+		struct	freebsd11_stat statbuf;
+	};
+
+	/*
+	 * The file name is a variable length array and no stat structure is
+	 * necessary if the user has set the nostat bit.  Allocate the FTSENT
+	 * structure, the file name and the stat structure in one chunk, but
+	 * be careful that the stat structure is reasonably aligned.
+	 */
+	if (ISSET(FTS_NOSTAT))
+		len = sizeof(FTSENT11) + namelen + 1;
+	else
+		len = sizeof(struct ftsent11_withstat) + namelen + 1;
+
+	if ((p = malloc(len)) == NULL)
+		return (NULL);
+
+	if (ISSET(FTS_NOSTAT)) {
+		p->fts_name = (char *)(p + 1);
+		p->fts_statp = NULL;
+	} else {
+		p->fts_name = (char *)((struct ftsent11_withstat *)p + 1);
+		p->fts_statp = &((struct ftsent11_withstat *)p)->statbuf;
+	}
+
+	/* Copy the name and guarantee NUL termination. */
+	memcpy(p->fts_name, name, namelen);
+	p->fts_name[namelen] = '\0';
+	p->fts_namelen = namelen;
+	p->fts_path = sp->fts_path;
+	p->fts_errno = 0;
+	p->fts_flags = 0;
+	p->fts_instr = FTS_NOINSTR;
+	p->fts_number = 0;
+	p->fts_pointer = NULL;
+	p->fts_fts = sp;
+	return (p);
+}
+
+static void
+fts_lfree(FTSENT11 *head)
+{
+	FTSENT11 *p;
+
+	/* Free a linked list of structures. */
+	while ((p = head)) {
+		head = head->fts_link;
+		free(p);
+	}
+}
+
+/*
+ * Allow essentially unlimited paths; find, rm, ls should all work on any tree.
+ * Most systems will allow creation of paths much longer than MAXPATHLEN, even
+ * though the kernel won't resolve them.  Add the size (not just what's needed)
+ * plus 256 bytes so don't realloc the path 2 bytes at a time.
+ */
+static int
+fts_palloc(FTS11 *sp, size_t more)
+{
+
+	sp->fts_pathlen += more + 256;
+	sp->fts_path = reallocf(sp->fts_path, sp->fts_pathlen);
+	return (sp->fts_path == NULL);
+}
+
+/*
+ * When the path is realloc'd, have to fix all of the pointers in structures
+ * already returned.
+ */
+static void
+fts_padjust(FTS11 *sp, FTSENT11 *head)
+{
+	FTSENT11 *p;
+	char *addr = sp->fts_path;
+
+#define	ADJUST(p) do {							\
+	if ((p)->fts_accpath != (p)->fts_name) {			\
+		(p)->fts_accpath =					\
+		    (char *)addr + ((p)->fts_accpath - (p)->fts_path);	\
+	}								\
+	(p)->fts_path = addr;						\
+} while (0)
+	/* Adjust the current set of children. */
+	for (p = sp->fts_child; p; p = p->fts_link)
+		ADJUST(p);
+
+	/* Adjust the rest of the tree, including the current level. */
+	for (p = head; p->fts_level >= FTS_ROOTLEVEL;) {
+		ADJUST(p);
+		p = p->fts_link ? p->fts_link : p->fts_parent;
+	}
+}
+
+static size_t
+fts_maxarglen(char * const *argv)
+{
+	size_t len, max;
+
+	for (max = 0; *argv; ++argv)
+		if ((len = strlen(*argv)) > max)
+			max = len;
+	return (max + 1);
+}
+
+/*
+ * Change to dir specified by fd or p->fts_accpath without getting
+ * tricked by someone changing the world out from underneath us.
+ * Assumes p->fts_dev and p->fts_ino are filled in.
+ */
+static int
+fts_safe_changedir(FTS11 *sp, FTSENT11 *p, int fd, char *path)
+{
+	int ret, oerrno, newfd;
+	struct freebsd11_stat sb;
+
+	newfd = fd;
+	if (ISSET(FTS_NOCHDIR))
+		return (0);
+	if (fd < 0 && (newfd = _open(path, O_RDONLY | O_DIRECTORY |
+	    O_CLOEXEC, 0)) < 0)
+		return (-1);
+	if (freebsd11_fstat(newfd, &sb)) {
+		ret = -1;
+		goto bail;
+	}
+	if (p->fts_dev != sb.st_dev || p->fts_ino != sb.st_ino) {
+		errno = ENOENT;		/* disinformation */
+		ret = -1;
+		goto bail;
+	}
+	ret = fchdir(newfd);
+bail:
+	oerrno = errno;
+	if (fd < 0)
+		(void)_close(newfd);
+	errno = oerrno;
+	return (ret);
+}
+
+/*
+ * Check if the filesystem for "ent" has UFS-style links.
+ */
+static int
+fts_ufslinks(FTS11 *sp, const FTSENT11 *ent)
+{
+	struct _fts_private11 *priv;
+	const char **cpp;
+
+	priv = (struct _fts_private11 *)sp;
+	/*
+	 * If this node's device is different from the previous, grab
+	 * the filesystem information, and decide on the reliability
+	 * of the link information from this filesystem for stat(2)
+	 * avoidance.
+	 */
+	if (priv->ftsp_dev != ent->fts_dev) {
+		if (freebsd11_statfs(ent->fts_path, &priv->ftsp_statfs) != -1) {
+			priv->ftsp_dev = ent->fts_dev;
+			priv->ftsp_linksreliable = 0;
+			for (cpp = ufslike_filesystems; *cpp; cpp++) {
+				if (strcmp(priv->ftsp_statfs.f_fstypename,
+				    *cpp) == 0) {
+					priv->ftsp_linksreliable = 1;
+					break;
+				}
+			}
+		} else {
+			priv->ftsp_linksreliable = 0;
+		}
+	}
+	return (priv->ftsp_linksreliable);
+}
+
+__sym_compat(fts_open, freebsd11_fts_open, FBSD_1.1);
+__sym_compat(fts_close, freebsd11_fts_close, FBSD_1.1);
+__sym_compat(fts_read, freebsd11_fts_read, FBSD_1.1);
+__sym_compat(fts_set, freebsd11_fts_set, FBSD_1.1);
+__sym_compat(fts_children, freebsd11_fts_children, FBSD_1.1);
+__sym_compat(fts_get_clientptr, freebsd11_fts_get_clientptr, FBSD_1.1);
+__sym_compat(fts_get_stream, freebsd11_fts_get_stream, FBSD_1.1);
+__sym_compat(fts_set_clientptr, freebsd11_fts_set_clientptr, FBSD_1.1);

Property changes on: head/lib/libc/gen/fts-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/fts-compat11.h
===================================================================
--- head/lib/libc/gen/fts-compat11.h	(nonexistent)
+++ head/lib/libc/gen/fts-compat11.h	(revision 318736)
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fts.h	8.3 (Berkeley) 8/14/94
+ * $FreeBSD$
+ */
+
+#ifndef	_FTS_COPMAT11_H_
+#define	_FTS_COPMAT11_H_
+
+typedef struct {
+	struct _ftsent11 *fts_cur;	/* current node */
+	struct _ftsent11 *fts_child;	/* linked list of children */
+	struct _ftsent11 **fts_array;	/* sort array */
+	uint32_t fts_dev;		/* starting device # */
+	char *fts_path;			/* path for this descent */
+	int fts_rfd;			/* fd for root */
+	__size_t fts_pathlen;		/* sizeof(path) */
+	__size_t fts_nitems;		/* elements in the sort array */
+	int (*fts_compar)		/* compare function */
+	    (const struct _ftsent11 * const *,
+	    const struct _ftsent11 * const *);
+	int fts_options;		/* fts_open options, global flags */
+	void *fts_clientptr;		/* thunk for sort function */
+} FTS11;
+
+typedef struct _ftsent11 {
+	struct _ftsent11 *fts_cycle;	/* cycle node */
+	struct _ftsent11 *fts_parent;	/* parent directory */
+	struct _ftsent11 *fts_link;	/* next file in directory */
+	long long fts_number;		/* local numeric value */
+	void *fts_pointer;		/* local address value */
+	char *fts_accpath;		/* access path */
+	char *fts_path;			/* root path */
+	int fts_errno;			/* errno for this node */
+	int fts_symfd;			/* fd for symlink */
+	__size_t fts_pathlen;		/* strlen(fts_path) */
+	__size_t fts_namelen;		/* strlen(fts_name) */
+
+	uint32_t fts_ino;		/* inode */
+	uint32_t fts_dev;		/* device */
+	uint16_t fts_nlink;		/* link count */
+
+	long fts_level;			/* depth (-1 to N) */
+
+	int fts_info;			/* user status for FTSENT structure */
+
+	unsigned fts_flags;		/* private flags for FTSENT structure */
+
+	int fts_instr;			/* fts_set() instructions */
+
+	struct freebsd11_stat *fts_statp; /* stat(2) information */
+	char *fts_name;			/* file name */
+	FTS11 *fts_fts;			/* back pointer to main FTS */
+} FTSENT11;
+
+FTSENT11	*freebsd11_fts_children(FTS11 *, int);
+int		 freebsd11_fts_close(FTS11 *);
+void		*freebsd11_fts_get_clientptr(FTS11 *);
+#define	freebsd11_fts_get_clientptr(fts)	((fts)->fts_clientptr)
+FTS11		*freebsd11_fts_get_stream(FTSENT11 *);
+#define	freebsd11_fts_get_stream(ftsent)	((ftsent)->fts_fts)
+FTS11		*freebsd11_fts_open(char * const *, int,
+		    int (*)(const FTSENT11 * const *,
+		    const FTSENT11 * const *));
+FTSENT11	*freebsd11_fts_read(FTS11 *);
+int		 freebsd11_fts_set(FTS11 *, FTSENT11 *, int);
+void		 freebsd11_fts_set_clientptr(FTS11 *, void *);
+
+#endif /* !_FTS_COMPAT11_H_ */

Property changes on: head/lib/libc/gen/fts-compat11.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/ftw-compat11.c
===================================================================
--- head/lib/libc/gen/ftw-compat11.c	(nonexistent)
+++ head/lib/libc/gen/ftw-compat11.c	(revision 318736)
@@ -0,0 +1,98 @@
+/*	$OpenBSD: ftw.c,v 1.5 2005/08/08 08:05:34 espie Exp $	*/
+
+/*
+ * Copyright (c) 2003, 2004 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ *
+ * from: $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fts.h>
+#include <ftw.h>
+
+#include "fts-compat11.h"
+
+int
+freebsd11_ftw(const char *path,
+    int (*fn)(const char *, const struct freebsd11_stat *, int), int nfds)
+{
+	char * const paths[2] = { (char *)path, NULL };
+	FTSENT11 *cur;
+	FTS11 *ftsp;
+	int error = 0, fnflag, sverrno;
+
+	/* XXX - nfds is currently unused */
+	if (nfds < 1) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	ftsp = freebsd11_fts_open(paths,
+	    FTS_LOGICAL | FTS_COMFOLLOW | FTS_NOCHDIR, NULL);
+	if (ftsp == NULL)
+		return (-1);
+	while ((cur = freebsd11_fts_read(ftsp)) != NULL) {
+		switch (cur->fts_info) {
+		case FTS_D:
+			fnflag = FTW_D;
+			break;
+		case FTS_DNR:
+			fnflag = FTW_DNR;
+			break;
+		case FTS_DP:
+			/* we only visit in preorder */
+			continue;
+		case FTS_F:
+		case FTS_DEFAULT:
+			fnflag = FTW_F;
+			break;
+		case FTS_NS:
+		case FTS_NSOK:
+		case FTS_SLNONE:
+			fnflag = FTW_NS;
+			break;
+		case FTS_SL:
+			fnflag = FTW_SL;
+			break;
+		case FTS_DC:
+			errno = ELOOP;
+			/* FALLTHROUGH */
+		default:
+			error = -1;
+			goto done;
+		}
+		error = fn(cur->fts_path, cur->fts_statp, fnflag);
+		if (error != 0)
+			break;
+	}
+done:
+	sverrno = errno;
+	if (freebsd11_fts_close(ftsp) != 0 && error == 0)
+		error = -1;
+	else
+		errno = sverrno;
+	return (error);
+}
+
+__sym_compat(ftw, freebsd11_ftw, FBSD_1.0);

Property changes on: head/lib/libc/gen/ftw-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/gen-compat.h
===================================================================
--- head/lib/libc/gen/gen-compat.h	(nonexistent)
+++ head/lib/libc/gen/gen-compat.h	(revision 318736)
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2012 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_GEN_COMPAT_H_
+#define	_GEN_COMPAT_H_
+
+#include <dirent.h>
+
+#define FREEBSD11_DIRSIZ(dp)						\
+	(sizeof(struct freebsd11_dirent) - sizeof((dp)->d_name) +	\
+	    (((dp)->d_namlen + 1 + 3) &~ 3))
+
+struct freebsd11_dirent;
+struct freebsd11_stat;
+struct freebsd11_statfs;
+
+struct freebsd11_dirent *freebsd11_readdir(DIR *);
+int	freebsd11_readdir_r(DIR *, struct freebsd11_dirent *,
+	    struct freebsd11_dirent **);
+int	freebsd11_stat(const char *, struct freebsd11_stat *);
+int	freebsd11_lstat(const char *, struct freebsd11_stat *);
+int	freebsd11_fstat(int, struct freebsd11_stat *);
+int	freebsd11_fstatat(int, const char *, struct freebsd11_stat *, int);
+
+int	freebsd11_statfs(const char *, struct freebsd11_statfs *);
+int	freebsd11_getfsstat(struct freebsd11_statfs *, long, int);
+int	freebsd11_getmntinfo(struct freebsd11_statfs **, int);
+
+char	*freebsd11_devname(__uint32_t dev, __mode_t type);
+char	*freebsd11_devname_r(__uint32_t dev, __mode_t type, char *buf, int len);
+
+#endif /* _GEN_COMPAT_H_ */

Property changes on: head/lib/libc/gen/gen-compat.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/gen/gen-private.h
===================================================================
--- head/lib/libc/gen/gen-private.h	(revision 318735)
+++ head/lib/libc/gen/gen-private.h	(revision 318736)
@@ -1,58 +1,62 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _GEN_PRIVATE_H_
 #define	_GEN_PRIVATE_H_
 
 struct _telldir;		/* see telldir.h */
 struct pthread_mutex;
 
 /*
  * Structure describing an open directory.
  *
  * NOTE. Change structure layout with care, at least dd_fd field has to
  * remain unchanged to guarantee backward compatibility.
  */
 struct _dirdesc {
 	int	dd_fd;		/* file descriptor associated with directory */
 	long	dd_loc;		/* offset in current buffer */
 	long	dd_size;	/* amount of data returned by getdirentries */
 	char	*dd_buf;	/* data buffer */
 	int	dd_len;		/* size of data buffer */
-	long	dd_seek;	/* magic cookie returned by getdirentries */
+	off_t	dd_seek;	/* magic cookie returned by getdirentries */
 	int	dd_flags;	/* flags for readdir */
 	struct pthread_mutex	*dd_lock;	/* lock */
 	struct _telldir *dd_td;	/* telldir position recording */
+	void	*dd_compat_de;	/* compat dirent */
 };
 
 #define	_dirfd(dirp)	((dirp)->dd_fd)
+
+struct dirent;
+int __readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result);
 
 #endif /* !_GEN_PRIVATE_H_ */
Index: head/lib/libc/gen/getmntinfo-compat11.c
===================================================================
--- head/lib/libc/gen/getmntinfo-compat11.c	(nonexistent)
+++ head/lib/libc/gen/getmntinfo-compat11.c	(revision 318736)
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)getmntinfo.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/ucred.h>
+#define	_WANT_FREEBSD11_STATFS
+#include <sys/mount.h>
+#include <stdlib.h>
+#include "gen-compat.h"
+
+/*
+ * Return information about mounted filesystems.
+ */
+int
+freebsd11_getmntinfo(struct freebsd11_statfs **mntbufp, int flags)
+{
+	static struct freebsd11_statfs *mntbuf;
+	static int mntsize;
+	static long bufsize;
+
+	if (mntsize <= 0 &&
+	    (mntsize = freebsd11_getfsstat(0, 0, MNT_NOWAIT)) < 0)
+		return (0);
+	if (bufsize > 0 &&
+	    (mntsize = freebsd11_getfsstat(mntbuf, bufsize, flags)) < 0)
+		return (0);
+	while (bufsize <= mntsize * sizeof(struct freebsd11_statfs)) {
+		if (mntbuf)
+			free(mntbuf);
+		bufsize = (mntsize + 1) * sizeof(struct freebsd11_statfs);
+		if ((mntbuf = (struct freebsd11_statfs *)malloc(bufsize)) == 0)
+			return (0);
+		if ((mntsize = freebsd11_getfsstat(mntbuf, bufsize, flags)) < 0)
+			return (0);
+	}
+	*mntbufp = mntbuf;
+	return (mntsize);
+}
+
+__sym_compat(getmntinfo, freebsd11_getmntinfo, FBSD_1.0);

Property changes on: head/lib/libc/gen/getmntinfo-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/glob-compat11.c
===================================================================
--- head/lib/libc/gen/glob-compat11.c	(nonexistent)
+++ head/lib/libc/gen/glob-compat11.c	(revision 318736)
@@ -0,0 +1,1093 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ *
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: $FreeBSD$
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#define	_WANT_FREEBSD11_STAT
+#include <sys/stat.h>
+
+#include <ctype.h>
+#define	_WANT_FREEBSD11_DIRENT
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
+#include <limits.h>
+#include <pwd.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+
+#include "collate.h"
+#include "gen-compat.h"
+#include "glob-compat11.h"
+
+/*
+ * glob(3) expansion limits. Stop the expansion if any of these limits
+ * is reached. This caps the runtime in the face of DoS attacks. See
+ * also CVE-2010-2632
+ */
+#define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
+#define	GLOB_LIMIT_PATH		65536	/* number of path elements */
+#define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
+#define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
+#define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
+
+struct glob_limit {
+	size_t	l_brace_cnt;
+	size_t	l_path_lim;
+	size_t	l_readdir_cnt;	
+	size_t	l_stat_cnt;	
+	size_t	l_string_cnt;
+};
+
+#define	DOT		L'.'
+#define	EOS		L'\0'
+#define	LBRACKET	L'['
+#define	NOT		L'!'
+#define	QUESTION	L'?'
+#define	QUOTE		L'\\'
+#define	RANGE		L'-'
+#define	RBRACKET	L']'
+#define	SEP		L'/'
+#define	STAR		L'*'
+#define	TILDE		L'~'
+#define	LBRACE		L'{'
+#define	RBRACE		L'}'
+#define	COMMA		L','
+
+#define	M_QUOTE		0x8000000000ULL
+#define	M_PROTECT	0x4000000000ULL
+#define	M_MASK		0xffffffffffULL
+#define	M_CHAR		0x00ffffffffULL
+
+typedef uint_fast64_t Char;
+
+#define	CHAR(c)		((Char)((c)&M_CHAR))
+#define	META(c)		((Char)((c)|M_QUOTE))
+#define	UNPROT(c)	((c) & ~M_PROTECT)
+#define	M_ALL		META(L'*')
+#define	M_END		META(L']')
+#define	M_NOT		META(L'!')
+#define	M_ONE		META(L'?')
+#define	M_RNG		META(L'-')
+#define	M_SET		META(L'[')
+#define	ismeta(c)	(((c)&M_QUOTE) != 0)
+#ifdef DEBUG
+#define	isprot(c)	(((c)&M_PROTECT) != 0)
+#endif
+
+static int	 compare(const void *, const void *);
+static int	 g_Ctoc(const Char *, char *, size_t);
+static int	 g_lstat(Char *, struct freebsd11_stat *, glob11_t *);
+static DIR	*g_opendir(Char *, glob11_t *);
+static const Char *g_strchr(const Char *, wchar_t);
+#ifdef notdef
+static Char	*g_strcat(Char *, const Char *);
+#endif
+static int	 g_stat(Char *, struct freebsd11_stat *, glob11_t *);
+static int	 glob0(const Char *, glob11_t *, struct glob_limit *,
+    const char *);
+static int	 glob1(Char *, glob11_t *, struct glob_limit *);
+static int	 glob2(Char *, Char *, Char *, Char *, glob11_t *,
+    struct glob_limit *);
+static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob11_t *,
+    struct glob_limit *);
+static int	 globextend(const Char *, glob11_t *, struct glob_limit *,
+    const char *);
+static const Char *
+		 globtilde(const Char *, Char *, size_t, glob11_t *);
+static int	 globexp0(const Char *, glob11_t *, struct glob_limit *,
+    const char *);
+static int	 globexp1(const Char *, glob11_t *, struct glob_limit *);
+static int	 globexp2(const Char *, const Char *, glob11_t *,
+    struct glob_limit *);
+static int	 globfinal(glob11_t *, struct glob_limit *, size_t,
+    const char *);
+static int	 match(Char *, Char *, Char *);
+static int	 err_nomatch(glob11_t *, struct glob_limit *, const char *);
+static int	 err_aborted(glob11_t *, int, char *);
+#ifdef DEBUG
+static void	 qprintf(const char *, Char *);
+#endif
+
+int
+freebsd11_glob(const char * __restrict pattern, int flags,
+	 int (*errfunc)(const char *, int), glob11_t * __restrict pglob)
+{
+	struct glob_limit limit = { 0, 0, 0, 0, 0 };
+	const char *patnext;
+	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
+	mbstate_t mbs;
+	wchar_t wc;
+	size_t clen;
+	int too_long;
+
+	patnext = pattern;
+	if (!(flags & GLOB_APPEND)) {
+		pglob->gl_pathc = 0;
+		pglob->gl_pathv = NULL;
+		if (!(flags & GLOB_DOOFFS))
+			pglob->gl_offs = 0;
+	}
+	if (flags & GLOB_LIMIT) {
+		limit.l_path_lim = pglob->gl_matchc;
+		if (limit.l_path_lim == 0)
+			limit.l_path_lim = GLOB_LIMIT_PATH;
+	}
+	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
+	pglob->gl_errfunc = errfunc;
+	pglob->gl_matchc = 0;
+
+	bufnext = patbuf;
+	bufend = bufnext + MAXPATHLEN - 1;
+	too_long = 1;
+	if (flags & GLOB_NOESCAPE) {
+		memset(&mbs, 0, sizeof(mbs));
+		while (bufnext <= bufend) {
+			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
+			if (clen == (size_t)-1 || clen == (size_t)-2)
+				return (err_nomatch(pglob, &limit, pattern));
+			else if (clen == 0) {
+				too_long = 0;
+				break;
+			}
+			*bufnext++ = wc;
+			patnext += clen;
+		}
+	} else {
+		/* Protect the quoted characters. */
+		memset(&mbs, 0, sizeof(mbs));
+		while (bufnext <= bufend) {
+			if (*patnext == '\\') {
+				if (*++patnext == '\0') {
+					*bufnext++ = QUOTE;
+					continue;
+				}
+				prot = M_PROTECT;
+			} else
+				prot = 0;
+			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
+			if (clen == (size_t)-1 || clen == (size_t)-2)
+				return (err_nomatch(pglob, &limit, pattern));
+			else if (clen == 0) {
+				too_long = 0;
+				break;
+			}
+			*bufnext++ = wc | prot;
+			patnext += clen;
+		}
+	}
+	if (too_long)
+		return (err_nomatch(pglob, &limit, pattern));
+	*bufnext = EOS;
+
+	if (flags & GLOB_BRACE)
+	    return (globexp0(patbuf, pglob, &limit, pattern));
+	else
+	    return (glob0(patbuf, pglob, &limit, pattern));
+}
+
+static int
+globexp0(const Char *pattern, glob11_t *pglob, struct glob_limit *limit,
+    const char *origpat) {
+	int rv;
+	size_t oldpathc;
+
+	/* Protect a single {}, for find(1), like csh */
+	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
+		if ((pglob->gl_flags & GLOB_LIMIT) &&
+		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
+			errno = E2BIG;
+			return (GLOB_NOSPACE);
+		}
+		return (glob0(pattern, pglob, limit, origpat));
+	}
+
+	oldpathc = pglob->gl_pathc;
+
+	if ((rv = globexp1(pattern, pglob, limit)) != 0)
+		return rv;
+
+	return (globfinal(pglob, limit, oldpathc, origpat));
+}
+
+/*
+ * Expand recursively a glob {} pattern. When there is no more expansion
+ * invoke the standard globbing routine to glob the rest of the magic
+ * characters
+ */
+static int
+globexp1(const Char *pattern, glob11_t *pglob, struct glob_limit *limit)
+{
+	const Char* ptr;
+
+	if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
+		if ((pglob->gl_flags & GLOB_LIMIT) &&
+		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
+			errno = E2BIG;
+			return (GLOB_NOSPACE);
+		}
+		return (globexp2(ptr, pattern, pglob, limit));
+	}
+
+	return (glob0(pattern, pglob, limit, NULL));
+}
+
+
+/*
+ * Recursive brace globbing helper. Tries to expand a single brace.
+ * If it succeeds then it invokes globexp1 with the new pattern.
+ * If it fails then it tries to glob the rest of the pattern and returns.
+ */
+static int
+globexp2(const Char *ptr, const Char *pattern, glob11_t *pglob,
+    struct glob_limit *limit)
+{
+	int     i, rv;
+	Char   *lm, *ls;
+	const Char *pe, *pm, *pm1, *pl;
+	Char    patbuf[MAXPATHLEN];
+
+	/* copy part up to the brace */
+	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
+		continue;
+	*lm = EOS;
+	ls = lm;
+
+	/* Find the balanced brace */
+	for (i = 0, pe = ++ptr; *pe != EOS; pe++)
+		if (*pe == LBRACKET) {
+			/* Ignore everything between [] */
+			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
+				continue;
+			if (*pe == EOS) {
+				/*
+				 * We could not find a matching RBRACKET.
+				 * Ignore and just look for RBRACE
+				 */
+				pe = pm;
+			}
+		}
+		else if (*pe == LBRACE)
+			i++;
+		else if (*pe == RBRACE) {
+			if (i == 0)
+				break;
+			i--;
+		}
+
+	/* Non matching braces; just glob the pattern */
+	if (i != 0 || *pe == EOS)
+		return (glob0(pattern, pglob, limit, NULL));
+
+	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
+		switch (*pm) {
+		case LBRACKET:
+			/* Ignore everything between [] */
+			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
+				continue;
+			if (*pm == EOS) {
+				/*
+				 * We could not find a matching RBRACKET.
+				 * Ignore and just look for RBRACE
+				 */
+				pm = pm1;
+			}
+			break;
+
+		case LBRACE:
+			i++;
+			break;
+
+		case RBRACE:
+			if (i) {
+			    i--;
+			    break;
+			}
+			/* FALLTHROUGH */
+		case COMMA:
+			if (i && *pm == COMMA)
+				break;
+			else {
+				/* Append the current string */
+				for (lm = ls; (pl < pm); *lm++ = *pl++)
+					continue;
+				/*
+				 * Append the rest of the pattern after the
+				 * closing brace
+				 */
+				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
+					continue;
+
+				/* Expand the current pattern */
+#ifdef DEBUG
+				qprintf("globexp2:", patbuf);
+#endif
+				rv = globexp1(patbuf, pglob, limit);
+				if (rv)
+					return (rv);
+
+				/* move after the comma, to the next string */
+				pl = pm + 1;
+			}
+			break;
+
+		default:
+			break;
+		}
+	return (0);
+}
+
+
+
+/*
+ * expand tilde from the passwd file.
+ */
+static const Char *
+globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob11_t *pglob)
+{
+	struct passwd *pwd;
+	char *h, *sc;
+	const Char *p;
+	Char *b, *eb;
+	wchar_t wc;
+	wchar_t wbuf[MAXPATHLEN];
+	wchar_t *wbufend, *dc;
+	size_t clen;
+	mbstate_t mbs;
+	int too_long;
+
+	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
+		return (pattern);
+
+	/* 
+	 * Copy up to the end of the string or / 
+	 */
+	eb = &patbuf[patbuf_len - 1];
+	for (p = pattern + 1, b = patbuf;
+	    b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
+		continue;
+
+	if (*p != EOS && UNPROT(*p) != SEP)
+		return (NULL);
+
+	*b = EOS;
+	h = NULL;
+
+	if (patbuf[0] == EOS) {
+		/*
+		 * handle a plain ~ or ~/ by expanding $HOME first (iff
+		 * we're not running setuid or setgid) and then trying
+		 * the password file
+		 */
+		if (issetugid() != 0 ||
+		    (h = getenv("HOME")) == NULL) {
+			if (((h = getlogin()) != NULL &&
+			     (pwd = getpwnam(h)) != NULL) ||
+			    (pwd = getpwuid(getuid())) != NULL)
+				h = pwd->pw_dir;
+			else
+				return (pattern);
+		}
+	}
+	else {
+		/*
+		 * Expand a ~user
+		 */
+		if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
+			return (NULL);
+		if ((pwd = getpwnam((char *)wbuf)) == NULL)
+			return (pattern);
+		else
+			h = pwd->pw_dir;
+	}
+
+	/* Copy the home directory */
+	dc = wbuf;
+	sc = h;
+	wbufend = wbuf + MAXPATHLEN - 1;
+	too_long = 1;
+	memset(&mbs, 0, sizeof(mbs));
+	while (dc <= wbufend) {
+		clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
+		if (clen == (size_t)-1 || clen == (size_t)-2) {
+			/* XXX See initial comment #2. */
+			wc = (unsigned char)*sc;
+			clen = 1;
+			memset(&mbs, 0, sizeof(mbs));
+		}
+		if ((*dc++ = wc) == EOS) {
+			too_long = 0;
+			break;
+		}
+		sc += clen;
+	}
+	if (too_long)
+		return (NULL);
+
+	dc = wbuf;
+	for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
+		continue;
+	if (*dc != EOS)
+		return (NULL);
+
+	/* Append the rest of the pattern */
+	if (*p != EOS) {
+		too_long = 1;
+		while (b <= eb) {
+			if ((*b++ = *p++) == EOS) {
+				too_long = 0;
+				break;
+			}
+		}
+		if (too_long)
+			return (NULL);
+	} else
+		*b = EOS;
+
+	return (patbuf);
+}
+
+
+/*
+ * The main glob() routine: compiles the pattern (optionally processing
+ * quotes), calls glob1() to do the real pattern matching, and finally
+ * sorts the list (unless unsorted operation is requested).  Returns 0
+ * if things went well, nonzero if errors occurred.
+ */
+static int
+glob0(const Char *pattern, glob11_t *pglob, struct glob_limit *limit,
+    const char *origpat) {
+	const Char *qpatnext;
+	int err;
+	size_t oldpathc;
+	Char *bufnext, c, patbuf[MAXPATHLEN];
+
+	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
+	if (qpatnext == NULL) {
+		errno = E2BIG;
+		return (GLOB_NOSPACE);
+	}
+	oldpathc = pglob->gl_pathc;
+	bufnext = patbuf;
+
+	/* We don't need to check for buffer overflow any more. */
+	while ((c = *qpatnext++) != EOS) {
+		switch (c) {
+		case LBRACKET:
+			c = *qpatnext;
+			if (c == NOT)
+				++qpatnext;
+			if (*qpatnext == EOS ||
+			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
+				*bufnext++ = LBRACKET;
+				if (c == NOT)
+					--qpatnext;
+				break;
+			}
+			*bufnext++ = M_SET;
+			if (c == NOT)
+				*bufnext++ = M_NOT;
+			c = *qpatnext++;
+			do {
+				*bufnext++ = CHAR(c);
+				if (*qpatnext == RANGE &&
+				    (c = qpatnext[1]) != RBRACKET) {
+					*bufnext++ = M_RNG;
+					*bufnext++ = CHAR(c);
+					qpatnext += 2;
+				}
+			} while ((c = *qpatnext++) != RBRACKET);
+			pglob->gl_flags |= GLOB_MAGCHAR;
+			*bufnext++ = M_END;
+			break;
+		case QUESTION:
+			pglob->gl_flags |= GLOB_MAGCHAR;
+			*bufnext++ = M_ONE;
+			break;
+		case STAR:
+			pglob->gl_flags |= GLOB_MAGCHAR;
+			/* collapse adjacent stars to one,
+			 * to avoid exponential behavior
+			 */
+			if (bufnext == patbuf || bufnext[-1] != M_ALL)
+			    *bufnext++ = M_ALL;
+			break;
+		default:
+			*bufnext++ = CHAR(c);
+			break;
+		}
+	}
+	*bufnext = EOS;
+#ifdef DEBUG
+	qprintf("glob0:", patbuf);
+#endif
+
+	if ((err = glob1(patbuf, pglob, limit)) != 0)
+		return(err);
+
+	if (origpat != NULL)
+		return (globfinal(pglob, limit, oldpathc, origpat));
+
+	return (0);
+}
+
+static int
+globfinal(glob11_t *pglob, struct glob_limit *limit, size_t oldpathc,
+    const char *origpat) {
+	if (pglob->gl_pathc == oldpathc)
+		return (err_nomatch(pglob, limit, origpat));
+
+	if (!(pglob->gl_flags & GLOB_NOSORT))
+		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
+		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
+
+	return (0);
+}
+
+static int
+compare(const void *p, const void *q)
+{
+	return (strcoll(*(char **)p, *(char **)q));
+}
+
+static int
+glob1(Char *pattern, glob11_t *pglob, struct glob_limit *limit)
+{
+	Char pathbuf[MAXPATHLEN];
+
+	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
+	if (*pattern == EOS)
+		return (0);
+	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
+	    pattern, pglob, limit));
+}
+
+/*
+ * The functions glob2 and glob3 are mutually recursive; there is one level
+ * of recursion for each segment in the pattern that contains one or more
+ * meta characters.
+ */
+static int
+glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
+      glob11_t *pglob, struct glob_limit *limit)
+{
+	struct freebsd11_stat sb;
+	Char *p, *q;
+	int anymeta;
+
+	/*
+	 * Loop over pattern segments until end of pattern or until
+	 * segment with meta character found.
+	 */
+	for (anymeta = 0;;) {
+		if (*pattern == EOS) {		/* End of pattern? */
+			*pathend = EOS;
+			if (g_lstat(pathbuf, &sb, pglob))
+				return (0);
+
+			if ((pglob->gl_flags & GLOB_LIMIT) &&
+			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
+				errno = E2BIG;
+				return (GLOB_NOSPACE);
+			}
+			if ((pglob->gl_flags & GLOB_MARK) &&
+			    UNPROT(pathend[-1]) != SEP &&
+			    (S_ISDIR(sb.st_mode) ||
+			    (S_ISLNK(sb.st_mode) &&
+			    g_stat(pathbuf, &sb, pglob) == 0 &&
+			    S_ISDIR(sb.st_mode)))) {
+				if (pathend + 1 > pathend_last) {
+					errno = E2BIG;
+					return (GLOB_NOSPACE);
+				}
+				*pathend++ = SEP;
+				*pathend = EOS;
+			}
+			++pglob->gl_matchc;
+			return (globextend(pathbuf, pglob, limit, NULL));
+		}
+
+		/* Find end of next segment, copy tentatively to pathend. */
+		q = pathend;
+		p = pattern;
+		while (*p != EOS && UNPROT(*p) != SEP) {
+			if (ismeta(*p))
+				anymeta = 1;
+			if (q + 1 > pathend_last) {
+				errno = E2BIG;
+				return (GLOB_NOSPACE);
+			}
+			*q++ = *p++;
+		}
+
+		if (!anymeta) {		/* No expansion, do next segment. */
+			pathend = q;
+			pattern = p;
+			while (UNPROT(*pattern) == SEP) {
+				if (pathend + 1 > pathend_last) {
+					errno = E2BIG;
+					return (GLOB_NOSPACE);
+				}
+				*pathend++ = *pattern++;
+			}
+		} else			/* Need expansion, recurse. */
+			return (glob3(pathbuf, pathend, pathend_last, pattern,
+			    p, pglob, limit));
+	}
+	/* NOTREACHED */
+}
+
+static int
+glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
+      Char *pattern, Char *restpattern,
+      glob11_t *pglob, struct glob_limit *limit)
+{
+	struct freebsd11_dirent *dp;
+	DIR *dirp;
+	int err, too_long, saverrno, saverrno2;
+	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
+
+	struct freebsd11_dirent *(*readdirfunc)(DIR *);
+
+	if (pathend > pathend_last) {
+		errno = E2BIG;
+		return (GLOB_NOSPACE);
+	}
+	*pathend = EOS;
+	if (pglob->gl_errfunc != NULL &&
+	    g_Ctoc(pathbuf, buf, sizeof(buf))) {
+		errno = E2BIG;
+		return (GLOB_NOSPACE);
+	}
+
+	saverrno = errno;
+	errno = 0;
+	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
+		if (errno == ENOENT || errno == ENOTDIR)
+			return (0);
+		err = err_aborted(pglob, errno, buf);
+		if (errno == 0)
+			errno = saverrno;
+		return (err);
+	}
+
+	err = 0;
+
+	/* pglob->gl_readdir takes a void *, fix this manually */
+	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+		readdirfunc =
+		    (struct freebsd11_dirent *(*)(DIR *))pglob->gl_readdir;
+	else
+		readdirfunc = freebsd11_readdir;
+
+	errno = 0;
+	/* Search directory for matching names. */
+	while ((dp = (*readdirfunc)(dirp)) != NULL) {
+		char *sc;
+		Char *dc;
+		wchar_t wc;
+		size_t clen;
+		mbstate_t mbs;
+
+		if ((pglob->gl_flags & GLOB_LIMIT) &&
+		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
+			errno = E2BIG;
+			err = GLOB_NOSPACE;
+			break;
+		}
+
+		/* Initial DOT must be matched literally. */
+		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
+			errno = 0;
+			continue;
+		}
+		memset(&mbs, 0, sizeof(mbs));
+		dc = pathend;
+		sc = dp->d_name;
+		too_long = 1;
+		while (dc <= pathend_last) {
+			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
+			if (clen == (size_t)-1 || clen == (size_t)-2) {
+				/* XXX See initial comment #2. */
+				wc = (unsigned char)*sc;
+				clen = 1;
+				memset(&mbs, 0, sizeof(mbs));
+			}
+			if ((*dc++ = wc) == EOS) {
+				too_long = 0;
+				break;
+			}
+			sc += clen;
+		}
+		if (too_long && (err = err_aborted(pglob, ENAMETOOLONG,
+		    buf))) {
+			errno = ENAMETOOLONG;
+			break;
+		}
+		if (too_long || !match(pathend, pattern, restpattern)) {
+			*pathend = EOS;
+			errno = 0;
+			continue;
+		}
+		if (errno == 0)
+			errno = saverrno;
+		err = glob2(pathbuf, --dc, pathend_last, restpattern,
+		    pglob, limit);
+		if (err)
+			break;
+		errno = 0;
+	}
+
+	saverrno2 = errno;
+	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+		(*pglob->gl_closedir)(dirp);
+	else
+		closedir(dirp);
+	errno = saverrno2;
+
+	if (err)
+		return (err);
+
+	if (dp == NULL && errno != 0 &&
+	    (err = err_aborted(pglob, errno, buf)))
+		return (err);
+
+	if (errno == 0)
+		errno = saverrno;
+	return (0);
+}
+
+
+/*
+ * Extend the gl_pathv member of a glob11_t structure to accommodate a new item,
+ * add the new item, and update gl_pathc.
+ *
+ * This assumes the BSD realloc, which only copies the block when its size
+ * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
+ * behavior.
+ *
+ * Return 0 if new item added, error code if memory couldn't be allocated.
+ *
+ * Invariant of the glob11_t structure:
+ *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
+ *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
+ */
+static int
+globextend(const Char *path, glob11_t *pglob, struct glob_limit *limit,
+    const char *origpat)
+{
+	char **pathv;
+	size_t i, newn, len;
+	char *copy;
+	const Char *p;
+
+	if ((pglob->gl_flags & GLOB_LIMIT) &&
+	    pglob->gl_matchc > limit->l_path_lim) {
+		errno = E2BIG;
+		return (GLOB_NOSPACE);
+	}
+
+	newn = 2 + pglob->gl_pathc + pglob->gl_offs;
+	/* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */
+	pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv));
+	if (pathv == NULL)
+		return (GLOB_NOSPACE);
+
+	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
+		/* first time around -- clear initial gl_offs items */
+		pathv += pglob->gl_offs;
+		for (i = pglob->gl_offs + 1; --i > 0; )
+			*--pathv = NULL;
+	}
+	pglob->gl_pathv = pathv;
+
+	if (origpat != NULL)
+		copy = strdup(origpat);
+	else {
+		for (p = path; *p++ != EOS;)
+			continue;
+		len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
+		if ((copy = malloc(len)) != NULL) {
+			if (g_Ctoc(path, copy, len)) {
+				free(copy);
+				errno = E2BIG;
+				return (GLOB_NOSPACE);
+			}
+		}
+	}
+	if (copy != NULL) {
+		limit->l_string_cnt += strlen(copy) + 1;
+		if ((pglob->gl_flags & GLOB_LIMIT) &&
+		    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
+			free(copy);
+			errno = E2BIG;
+			return (GLOB_NOSPACE);
+		}
+		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
+	}
+	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
+	return (copy == NULL ? GLOB_NOSPACE : 0);
+}
+
+/*
+ * pattern matching function for filenames.
+ */
+static int
+match(Char *name, Char *pat, Char *patend)
+{
+	int ok, negate_range;
+	Char c, k, *nextp, *nextn;
+	struct xlocale_collate *table =
+		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+
+	nextn = NULL;
+	nextp = NULL;
+
+	while (1) {
+		while (pat < patend) {
+			c = *pat++;
+			switch (c & M_MASK) {
+			case M_ALL:
+				if (pat == patend)
+					return (1);
+				if (*name == EOS)
+					return (0);
+				nextn = name + 1;
+				nextp = pat - 1;
+				break;
+			case M_ONE:
+				if (*name++ == EOS)
+					goto fail;
+				break;
+			case M_SET:
+				ok = 0;
+				if ((k = *name++) == EOS)
+					goto fail;
+				negate_range = ((*pat & M_MASK) == M_NOT);
+				if (negate_range != 0)
+					++pat;
+				while (((c = *pat++) & M_MASK) != M_END)
+					if ((*pat & M_MASK) == M_RNG) {
+						if (table->__collate_load_error ?
+						    CHAR(c) <= CHAR(k) &&
+						    CHAR(k) <= CHAR(pat[1]) :
+						    __wcollate_range_cmp(CHAR(c),
+						    CHAR(k)) <= 0 &&
+						    __wcollate_range_cmp(CHAR(k),
+						    CHAR(pat[1])) <= 0)
+							ok = 1;
+						pat += 2;
+					} else if (c == k)
+						ok = 1;
+				if (ok == negate_range)
+					goto fail;
+				break;
+			default:
+				if (*name++ != c)
+					goto fail;
+				break;
+			}
+		}
+		if (*name == EOS)
+			return (1);
+
+	fail:
+		if (nextn == NULL)
+			break;
+		pat = nextp;
+		name = nextn;
+	}
+	return (0);
+}
+
+/* Free allocated data belonging to a glob11_t structure. */
+void
+freebsd11_globfree(glob11_t *pglob)
+{
+	size_t i;
+	char **pp;
+
+	if (pglob->gl_pathv != NULL) {
+		pp = pglob->gl_pathv + pglob->gl_offs;
+		for (i = pglob->gl_pathc; i--; ++pp)
+			if (*pp)
+				free(*pp);
+		free(pglob->gl_pathv);
+		pglob->gl_pathv = NULL;
+	}
+}
+
+static DIR *
+g_opendir(Char *str, glob11_t *pglob)
+{
+	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
+
+	if (*str == EOS)
+		strcpy(buf, ".");
+	else {
+		if (g_Ctoc(str, buf, sizeof(buf))) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+
+	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+		return ((*pglob->gl_opendir)(buf));
+
+	return (opendir(buf));
+}
+
+static int
+g_lstat(Char *fn, struct freebsd11_stat *sb, glob11_t *pglob)
+{
+	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
+
+	if (g_Ctoc(fn, buf, sizeof(buf))) {
+		errno = ENAMETOOLONG;
+		return (-1);
+	}
+	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+		return((*pglob->gl_lstat)(buf, sb));
+	return (freebsd11_lstat(buf, sb));
+}
+
+static int
+g_stat(Char *fn, struct freebsd11_stat *sb, glob11_t *pglob)
+{
+	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
+
+	if (g_Ctoc(fn, buf, sizeof(buf))) {
+		errno = ENAMETOOLONG;
+		return (-1);
+	}
+	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+		return ((*pglob->gl_stat)(buf, sb));
+	return (freebsd11_stat(buf, sb));
+}
+
+static const Char *
+g_strchr(const Char *str, wchar_t ch)
+{
+
+	do {
+		if (*str == ch)
+			return (str);
+	} while (*str++);
+	return (NULL);
+}
+
+static int
+g_Ctoc(const Char *str, char *buf, size_t len)
+{
+	mbstate_t mbs;
+	size_t clen;
+
+	memset(&mbs, 0, sizeof(mbs));
+	while (len >= MB_CUR_MAX) {
+		clen = wcrtomb(buf, CHAR(*str), &mbs);
+		if (clen == (size_t)-1) {
+			/* XXX See initial comment #2. */
+			*buf = (char)CHAR(*str);
+			clen = 1;
+			memset(&mbs, 0, sizeof(mbs));
+		}
+		if (CHAR(*str) == EOS)
+			return (0);
+		str++;
+		buf += clen;
+		len -= clen;
+	}
+	return (1);
+}
+
+static int
+err_nomatch(glob11_t *pglob, struct glob_limit *limit, const char *origpat) {
+	/*
+	 * If there was no match we are going to append the origpat
+	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
+	 * and the origpat did not contain any magic characters
+	 * GLOB_NOMAGIC is there just for compatibility with csh.
+	 */
+	if ((pglob->gl_flags & GLOB_NOCHECK) ||
+	    ((pglob->gl_flags & GLOB_NOMAGIC) &&
+	    !(pglob->gl_flags & GLOB_MAGCHAR)))
+		return (globextend(NULL, pglob, limit, origpat));
+	return (GLOB_NOMATCH);
+}
+
+static int
+err_aborted(glob11_t *pglob, int err, char *buf) {
+	if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) ||
+	    (pglob->gl_flags & GLOB_ERR))
+		return (GLOB_ABORTED);
+	return (0);
+}
+
+#ifdef DEBUG
+static void
+qprintf(const char *str, Char *s)
+{
+	Char *p;
+
+	(void)printf("%s\n", str);
+	if (s != NULL) {
+		for (p = s; *p != EOS; p++)
+			(void)printf("%c", (char)CHAR(*p));
+		(void)printf("\n");
+		for (p = s; *p != EOS; p++)
+			(void)printf("%c", (isprot(*p) ? '\\' : ' '));
+		(void)printf("\n");
+		for (p = s; *p != EOS; p++)
+			(void)printf("%c", (ismeta(*p) ? '_' : ' '));
+		(void)printf("\n");
+	}
+}
+#endif
+
+__sym_compat(glob, freebsd11_glob, FBSD_1.0);
+__sym_compat(globfree, freebsd11_globfree, FBSD_1.0);

Property changes on: head/lib/libc/gen/glob-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/glob-compat11.h
===================================================================
--- head/lib/libc/gen/glob-compat11.h	(nonexistent)
+++ head/lib/libc/gen/glob-compat11.h	(revision 318736)
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)glob.h	8.1 (Berkeley) 6/2/93
+ * from: $FreeBSD$
+ * $FreeBSD$
+ */
+
+#ifndef _GLOB_COMPAT11_H_
+#define	_GLOB_COMPAT11_H_
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <glob.h>
+
+struct freebsd11_stat;
+typedef struct {
+	size_t gl_pathc;	/* Count of total paths so far. */
+	size_t gl_matchc;	/* Count of paths matching pattern. */
+	size_t gl_offs;		/* Reserved at beginning of gl_pathv. */
+	int gl_flags;		/* Copy of flags parameter to glob. */
+	char **gl_pathv;	/* List of paths matching pattern. */
+				/* Copy of errfunc parameter to glob. */
+	int (*gl_errfunc)(const char *, int);
+
+	/*
+	 * Alternate filesystem access methods for glob; replacement
+	 * versions of closedir(3), readdir(3), opendir(3), stat(2)
+	 * and lstat(2).
+	 */
+	void (*gl_closedir)(void *);
+	struct freebsd11_dirent *(*gl_readdir)(void *);
+	void *(*gl_opendir)(const char *);
+	int (*gl_lstat)(const char *, struct freebsd11_stat *);
+	int (*gl_stat)(const char *, struct freebsd11_stat *);
+} glob11_t;
+
+__BEGIN_DECLS
+int	freebsd11_glob(const char * __restrict, int,
+	int (*)(const char *, int), glob11_t * __restrict);
+void	freebsd11_globfree(glob11_t *);
+__END_DECLS
+
+#endif /* !_GLOB_COMPAT11_H_ */

Property changes on: head/lib/libc/gen/glob-compat11.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/nftw-compat11.c
===================================================================
--- head/lib/libc/gen/nftw-compat11.c	(nonexistent)
+++ head/lib/libc/gen/nftw-compat11.c	(revision 318736)
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003, 2004 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ *
+ * from: $OpenBSD: nftw.c,v 1.7 2006/03/31 19:41:44 millert Exp $
+ * from: $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fts.h>
+#include <ftw.h>
+
+#include "fts-compat11.h"
+
+int
+freebsd11_nftw(const char *path,
+    int (*fn)(const char *, const struct freebsd11_stat *, int, struct FTW *),
+    int nfds, int ftwflags)
+{
+	char * const paths[2] = { (char *)path, NULL };
+	struct FTW ftw;
+	FTSENT11 *cur;
+	FTS11 *ftsp;
+	int error = 0, ftsflags, fnflag, postorder, sverrno;
+
+	/* XXX - nfds is currently unused */
+	if (nfds < 1) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	ftsflags = FTS_COMFOLLOW;
+	if (!(ftwflags & FTW_CHDIR))
+		ftsflags |= FTS_NOCHDIR;
+	if (ftwflags & FTW_MOUNT)
+		ftsflags |= FTS_XDEV;
+	if (ftwflags & FTW_PHYS)
+		ftsflags |= FTS_PHYSICAL;
+	else
+		ftsflags |= FTS_LOGICAL;
+	postorder = (ftwflags & FTW_DEPTH) != 0;
+	ftsp = freebsd11_fts_open(paths, ftsflags, NULL);
+	if (ftsp == NULL)
+		return (-1);
+	while ((cur = freebsd11_fts_read(ftsp)) != NULL) {
+		switch (cur->fts_info) {
+		case FTS_D:
+			if (postorder)
+				continue;
+			fnflag = FTW_D;
+			break;
+		case FTS_DC:
+			continue;
+		case FTS_DNR:
+			fnflag = FTW_DNR;
+			break;
+		case FTS_DP:
+			if (!postorder)
+				continue;
+			fnflag = FTW_DP;
+			break;
+		case FTS_F:
+		case FTS_DEFAULT:
+			fnflag = FTW_F;
+			break;
+		case FTS_NS:
+		case FTS_NSOK:
+			fnflag = FTW_NS;
+			break;
+		case FTS_SL:
+			fnflag = FTW_SL;
+			break;
+		case FTS_SLNONE:
+			fnflag = FTW_SLN;
+			break;
+		default:
+			error = -1;
+			goto done;
+		}
+		ftw.base = cur->fts_pathlen - cur->fts_namelen;
+		ftw.level = cur->fts_level;
+		error = fn(cur->fts_path, cur->fts_statp, fnflag, &ftw);
+		if (error != 0)
+			break;
+	}
+done:
+	sverrno = errno;
+	if (freebsd11_fts_close(ftsp) != 0 && error == 0)
+		error = -1;
+	else
+		errno = sverrno;
+	return (error);
+}
+
+__sym_compat(nftw, freebsd11_nftw, FBSD_1.0);

Property changes on: head/lib/libc/gen/nftw-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/opendir.c
===================================================================
--- head/lib/libc/gen/opendir.c	(revision 318735)
+++ head/lib/libc/gen/opendir.c	(revision 318736)
@@ -1,360 +1,361 @@
 /*-
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)opendir.c	8.8 (Berkeley) 5/1/95";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "un-namespace.h"
 
 #include "gen-private.h"
 #include "telldir.h"
 
 static DIR * __opendir_common(int, int, bool);
 
 /*
  * Open a directory.
  */
 DIR *
 opendir(const char *name)
 {
 
 	return (__opendir2(name, DTF_HIDEW|DTF_NODUP));
 }
 
 /*
  * Open a directory with existing file descriptor.
  */
 DIR *
 fdopendir(int fd)
 {
 
 	if (_fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
 		return (NULL);
 	return (__opendir_common(fd, DTF_HIDEW|DTF_NODUP, true));
 }
 
 DIR *
 __opendir2(const char *name, int flags)
 {
 	int fd;
 	DIR *dir;
 	int saved_errno;
 
 	if ((flags & (__DTF_READALL | __DTF_SKIPREAD)) != 0)
 		return (NULL);
 	if ((fd = _open(name,
 	    O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_CLOEXEC)) == -1)
 		return (NULL);
 
 	dir = __opendir_common(fd, flags, false);
 	if (dir == NULL) {
 		saved_errno = errno;
 		_close(fd);
 		errno = saved_errno;
 	}
 	return (dir);
 }
 
 static int
 opendir_compar(const void *p1, const void *p2)
 {
 
 	return (strcmp((*(const struct dirent **)p1)->d_name,
 	    (*(const struct dirent **)p2)->d_name));
 }
 
 /*
  * For a directory at the top of a unionfs stack, the entire directory's
  * contents are read and cached locally until the next call to rewinddir().
  * For the fdopendir() case, the initial seek position must be preserved.
  * For rewinddir(), the full directory should always be re-read from the
  * beginning.
  *
  * If an error occurs, the existing buffer and state of 'dirp' is left
  * unchanged.
  */
 bool
 _filldir(DIR *dirp, bool use_current_pos)
 {
 	struct dirent **dpv;
 	char *buf, *ddptr, *ddeptr;
 	off_t pos;
 	int fd2, incr, len, n, saved_errno, space;
 	
 	len = 0;
 	space = 0;
 	buf = NULL;
 	ddptr = NULL;
 
 	/*
 	 * Use the system page size if that is a multiple of DIRBLKSIZ.
 	 * Hopefully this can be a big win someday by allowing page
 	 * trades to user space to be done by _getdirentries().
 	 */
 	incr = getpagesize();
 	if ((incr % DIRBLKSIZ) != 0) 
 		incr = DIRBLKSIZ;
 
 	/*
 	 * The strategy here is to read all the directory
 	 * entries into a buffer, sort the buffer, and
 	 * remove duplicate entries by setting the inode
 	 * number to zero.
 	 *
 	 * We reopen the directory because _getdirentries()
 	 * on a MNT_UNION mount modifies the open directory,
 	 * making it refer to the lower directory after the
 	 * upper directory's entries are exhausted.
 	 * This would otherwise break software that uses
 	 * the directory descriptor for fchdir or *at
 	 * functions, such as fts.c.
 	 */
 	if ((fd2 = _openat(dirp->dd_fd, ".", O_RDONLY | O_CLOEXEC)) == -1)
 		return (false);
 
 	if (use_current_pos) {
 		pos = lseek(dirp->dd_fd, 0, SEEK_CUR);
 		if (pos == -1 || lseek(fd2, pos, SEEK_SET) == -1) {
 			saved_errno = errno;
 			_close(fd2);
 			errno = saved_errno;
 			return (false);
 		}
 	}
 
 	do {
 		/*
 		 * Always make at least DIRBLKSIZ bytes
 		 * available to _getdirentries
 		 */
 		if (space < DIRBLKSIZ) {
 			space += incr;
 			len += incr;
 			buf = reallocf(buf, len);
 			if (buf == NULL) {
 				saved_errno = errno;
 				_close(fd2);
 				errno = saved_errno;
 				return (false);
 			}
 			ddptr = buf + (len - space);
 		}
 
 		n = _getdirentries(fd2, ddptr, space, &dirp->dd_seek);
 		if (n > 0) {
 			ddptr += n;
 			space -= n;
 		}
 		if (n < 0) {
 			saved_errno = errno;
 			_close(fd2);
 			errno = saved_errno;
 			return (false);
 		}
 	} while (n > 0);
 	_close(fd2);
 
 	ddeptr = ddptr;
 
 	/*
 	 * There is now a buffer full of (possibly) duplicate
 	 * names.
 	 */
 	dirp->dd_buf = buf;
 
 	/*
 	 * Go round this loop twice...
 	 *
 	 * Scan through the buffer, counting entries.
 	 * On the second pass, save pointers to each one.
 	 * Then sort the pointers and remove duplicate names.
 	 */
 	for (dpv = NULL;;) {
 		n = 0;
 		ddptr = buf;
 		while (ddptr < ddeptr) {
 			struct dirent *dp;
 
 			dp = (struct dirent *) ddptr;
 			if ((long)dp & 03L)
 				break;
 			if ((dp->d_reclen <= 0) ||
 			    (dp->d_reclen > (ddeptr + 1 - ddptr)))
 				break;
 			ddptr += dp->d_reclen;
 			if (dp->d_fileno) {
 				if (dpv)
 					dpv[n] = dp;
 				n++;
 			}
 		}
 
 		if (dpv) {
 			struct dirent *xp;
 
 			/*
 			 * This sort must be stable.
 			 */
 			mergesort(dpv, n, sizeof(*dpv), opendir_compar);
 
 			dpv[n] = NULL;
 			xp = NULL;
 
 			/*
 			 * Scan through the buffer in sort order,
 			 * zapping the inode number of any
 			 * duplicate names.
 			 */
 			for (n = 0; dpv[n]; n++) {
 				struct dirent *dp = dpv[n];
 
 				if ((xp == NULL) ||
 				    strcmp(dp->d_name, xp->d_name)) {
 					xp = dp;
 				} else {
 					dp->d_fileno = 0;
 				}
 				if (dp->d_type == DT_WHT &&
 				    (dirp->dd_flags & DTF_HIDEW))
 					dp->d_fileno = 0;
 			}
 
 			free(dpv);
 			break;
 		} else {
 			dpv = malloc((n+1) * sizeof(struct dirent *));
 			if (dpv == NULL)
 				break;
 		}
 	}
 
 	dirp->dd_len = len;
 	dirp->dd_size = ddptr - dirp->dd_buf;
 	return (true);
 }
 
 
 /*
  * Common routine for opendir(3), __opendir2(3) and fdopendir(3).
  */
 static DIR *
 __opendir_common(int fd, int flags, bool use_current_pos)
 {
 	DIR *dirp;
 	int incr;
 	int saved_errno;
 	int unionstack;
 
 	if ((dirp = malloc(sizeof(DIR) + sizeof(struct _telldir))) == NULL)
 		return (NULL);
 
 	dirp->dd_buf = NULL;
 	dirp->dd_fd = fd;
 	dirp->dd_flags = flags;
 	dirp->dd_loc = 0;
 	dirp->dd_lock = NULL;
 	dirp->dd_td = (struct _telldir *)((char *)dirp + sizeof(DIR));
 	LIST_INIT(&dirp->dd_td->td_locq);
 	dirp->dd_td->td_loccnt = 0;
+	dirp->dd_compat_de = NULL;
 
 	/*
 	 * Use the system page size if that is a multiple of DIRBLKSIZ.
 	 * Hopefully this can be a big win someday by allowing page
 	 * trades to user space to be done by _getdirentries().
 	 */
 	incr = getpagesize();
 	if ((incr % DIRBLKSIZ) != 0) 
 		incr = DIRBLKSIZ;
 
 	/*
 	 * Determine whether this directory is the top of a union stack.
 	 */
 	if (flags & DTF_NODUP) {
 		struct statfs sfb;
 
 		if (_fstatfs(fd, &sfb) < 0)
 			goto fail;
 		unionstack = !strcmp(sfb.f_fstypename, "unionfs")
 		    || (sfb.f_flags & MNT_UNION);
 	} else {
 		unionstack = 0;
 	}
 
 	if (unionstack) {
 		if (!_filldir(dirp, use_current_pos))
 			goto fail;
 		dirp->dd_flags |= __DTF_READALL;
 	} else {
 		dirp->dd_len = incr;
 		dirp->dd_buf = malloc(dirp->dd_len);
 		if (dirp->dd_buf == NULL)
 			goto fail;
 		if (use_current_pos) {
 			/*
 			 * Read the first batch of directory entries
 			 * to prime dd_seek.  This also checks if the
 			 * fd passed to fdopendir() is a directory.
 			 */
 			dirp->dd_size = _getdirentries(dirp->dd_fd,
 			    dirp->dd_buf, dirp->dd_len, &dirp->dd_seek);
 			if (dirp->dd_size < 0) {
 				if (errno == EINVAL)
 					errno = ENOTDIR;
 				goto fail;
 			}
 			dirp->dd_flags |= __DTF_SKIPREAD;
 		} else {
 			dirp->dd_size = 0;
 			dirp->dd_seek = 0;
 		}
 	}
 
 	return (dirp);
 
 fail:
 	saved_errno = errno;
 	free(dirp->dd_buf);
 	free(dirp);
 	errno = saved_errno;
 	return (NULL);
 }
Index: head/lib/libc/gen/readdir-compat11.c
===================================================================
--- head/lib/libc/gen/readdir-compat11.c	(nonexistent)
+++ head/lib/libc/gen/readdir-compat11.c	(revision 318736)
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 1983, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from:
+ * $FreeBSD$
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)readdir.c	8.3 (Berkeley) 9/29/94";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#define	_WANT_FREEBSD11_DIRENT
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "un-namespace.h"
+
+#include "libc_private.h"
+#include "gen-private.h"
+#include "telldir.h"
+
+#include "gen-compat.h"
+
+static bool
+freebsd11_cvtdirent(struct freebsd11_dirent *dstdp, struct dirent *srcdp)
+{
+
+	if (srcdp->d_namlen >= sizeof(dstdp->d_name))
+		return (false);
+	dstdp->d_type = srcdp->d_type;
+	dstdp->d_namlen = srcdp->d_namlen;
+	dstdp->d_fileno = srcdp->d_fileno;		/* truncate */
+	dstdp->d_reclen = FREEBSD11_DIRSIZ(dstdp);
+	bcopy(srcdp->d_name, dstdp->d_name, dstdp->d_namlen);
+	bzero(dstdp->d_name + dstdp->d_namlen,
+	    dstdp->d_reclen - offsetof(struct freebsd11_dirent, d_name) -
+	    dstdp->d_namlen);
+	return (true);
+}
+
+struct freebsd11_dirent *
+freebsd11_readdir(DIR *dirp)
+{
+	struct freebsd11_dirent *dstdp;
+	struct dirent *dp;
+
+	if (__isthreaded)
+		_pthread_mutex_lock(&dirp->dd_lock);
+	dp = _readdir_unlocked(dirp, RDU_SKIP);
+	if (dp != NULL) {
+		if (dirp->dd_compat_de == NULL)
+			dirp->dd_compat_de = malloc(sizeof(struct
+			    freebsd11_dirent));
+		if (freebsd11_cvtdirent(dirp->dd_compat_de, dp))
+			dstdp = dirp->dd_compat_de;
+		else
+			dstdp = NULL;
+	} else
+		dstdp = NULL;
+	if (__isthreaded)
+		_pthread_mutex_unlock(&dirp->dd_lock);
+
+	return (dstdp);
+}
+
+int
+freebsd11_readdir_r(DIR *dirp, struct freebsd11_dirent *entry,
+    struct freebsd11_dirent **result)
+{
+	struct dirent xentry, *xresult;
+	int error;
+
+	error = __readdir_r(dirp, &xentry, &xresult);
+	if (error != 0)
+		return (error);
+	if (xresult != NULL) {
+		if (freebsd11_cvtdirent(entry, &xentry))
+			*result = entry;
+		else /* should not happen due to RDU_SHORT */
+			*result = NULL;
+	} else
+		*result = NULL;
+	return (0);
+}
+
+__sym_compat(readdir, freebsd11_readdir, FBSD_1.0);
+__sym_compat(readdir_r, freebsd11_readdir_r, FBSD_1.0);

Property changes on: head/lib/libc/gen/readdir-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/readdir.c
===================================================================
--- head/lib/libc/gen/readdir.c	(revision 318735)
+++ head/lib/libc/gen/readdir.c	(revision 318736)
@@ -1,135 +1,137 @@
 /*
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)readdir.c	8.3 (Berkeley) 9/29/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
 #include <dirent.h>
 #include <errno.h>
 #include <string.h>
 #include <pthread.h>
 #include "un-namespace.h"
 
 #include "libc_private.h"
 #include "gen-private.h"
 #include "telldir.h"
 
 /*
  * get next entry in a directory.
  */
 struct dirent *
-_readdir_unlocked(DIR *dirp, int skip)
+_readdir_unlocked(DIR *dirp, int flags)
 {
 	struct dirent *dp;
 	long initial_seek;
 	long initial_loc = 0;
 
 	for (;;) {
 		if (dirp->dd_loc >= dirp->dd_size) {
 			if (dirp->dd_flags & __DTF_READALL)
 				return (NULL);
 			initial_loc = dirp->dd_loc;
 			dirp->dd_flags &= ~__DTF_SKIPREAD;
 			dirp->dd_loc = 0;
 		}
 		if (dirp->dd_loc == 0 &&
 		    !(dirp->dd_flags & (__DTF_READALL | __DTF_SKIPREAD))) {
 			initial_seek = dirp->dd_seek;
 			dirp->dd_size = _getdirentries(dirp->dd_fd,
 			    dirp->dd_buf, dirp->dd_len, &dirp->dd_seek);
 			if (dirp->dd_size <= 0)
 				return (NULL);
 			_fixtelldir(dirp, initial_seek, initial_loc);
 		}
 		dirp->dd_flags &= ~__DTF_SKIPREAD;
 		dp = (struct dirent *)(dirp->dd_buf + dirp->dd_loc);
 		if ((long)dp & 03L)	/* bogus pointer check */
 			return (NULL);
 		if (dp->d_reclen <= 0 ||
 		    dp->d_reclen > dirp->dd_len + 1 - dirp->dd_loc)
 			return (NULL);
 		dirp->dd_loc += dp->d_reclen;
-		if (dp->d_ino == 0 && skip)
+		if (dp->d_ino == 0 && (flags & RDU_SKIP) != 0)
 			continue;
 		if (dp->d_type == DT_WHT && (dirp->dd_flags & DTF_HIDEW))
 			continue;
+		if (dp->d_namlen >= sizeof(dp->d_name) &&
+		    (flags & RDU_SHORT) != 0)
+			continue;
 		return (dp);
 	}
 }
 
 struct dirent *
 readdir(DIR *dirp)
 {
-	struct dirent	*dp;
+	struct dirent *dp;
 
-	if (__isthreaded) {
+	if (__isthreaded)
 		_pthread_mutex_lock(&dirp->dd_lock);
-		dp = _readdir_unlocked(dirp, 1);
+	dp = _readdir_unlocked(dirp, RDU_SKIP);
+	if (__isthreaded)
 		_pthread_mutex_unlock(&dirp->dd_lock);
-	}
-	else
-		dp = _readdir_unlocked(dirp, 1);
 	return (dp);
 }
 
 int
-readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
+__readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
 {
 	struct dirent *dp;
 	int saved_errno;
 
 	saved_errno = errno;
 	errno = 0;
-	if (__isthreaded) {
+	if (__isthreaded)
 		_pthread_mutex_lock(&dirp->dd_lock);
-		if ((dp = _readdir_unlocked(dirp, 1)) != NULL)
-			memcpy(entry, dp, _GENERIC_DIRSIZ(dp));
-		_pthread_mutex_unlock(&dirp->dd_lock);
-	}
-	else if ((dp = _readdir_unlocked(dirp, 1)) != NULL)
+	dp = _readdir_unlocked(dirp, RDU_SKIP | RDU_SHORT);
+	if (dp != NULL)
 		memcpy(entry, dp, _GENERIC_DIRSIZ(dp));
+	if (__isthreaded)
+		_pthread_mutex_unlock(&dirp->dd_lock);
 
 	if (errno != 0) {
 		if (dp == NULL)
 			return (errno);
 	} else
 		errno = saved_errno;
 
 	if (dp != NULL)
 		*result = entry;
 	else
 		*result = NULL;
 
 	return (0);
 }
+
+__strong_reference(__readdir_r, readdir_r);
Index: head/lib/libc/gen/scandir-compat11.c
===================================================================
--- head/lib/libc/gen/scandir-compat11.c	(nonexistent)
+++ head/lib/libc/gen/scandir-compat11.c	(revision 318736)
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 1983, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from:
+ * $FreeBSD$
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)scandir.c	8.3 (Berkeley) 1/2/94";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Scan the directory dirname calling select to make a list of selected
+ * directory entries then sort using qsort and compare routine dcomp.
+ * Returns the number of entries and a pointer to a list of pointers to
+ * struct dirent (through namelist). Returns -1 if there were any errors.
+ */
+
+#include "namespace.h"
+#define	_WANT_FREEBSD11_DIRENT
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include "un-namespace.h"
+
+#include "gen-compat.h"
+
+#ifdef	I_AM_SCANDIR_B
+#include "block_abi.h"
+#define	SELECT(x)	CALL_BLOCK(select, x)
+#ifndef __BLOCKS__
+void
+qsort_b(void *, size_t, size_t, void*);
+#endif
+#else
+#define	SELECT(x)	select(x)
+#endif
+
+static int freebsd11_alphasort_thunk(void *thunk, const void *p1,
+    const void *p2);
+
+int
+#ifdef I_AM_SCANDIR_B
+freebsd11_scandir_b(const char *dirname, struct freebsd11_dirent ***namelist,
+    DECLARE_BLOCK(int, select, const struct freebsd11_dirent *),
+    DECLARE_BLOCK(int, dcomp, const struct freebsd11_dirent **,
+    const struct freebsd11_dirent **))
+#else
+freebsd11_scandir(const char *dirname, struct freebsd11_dirent ***namelist,
+    int (*select)(const struct freebsd11_dirent *),
+    int (*dcomp)(const struct freebsd11_dirent **,
+	const struct freebsd11_dirent **))
+#endif
+{
+	struct freebsd11_dirent *d, *p, **names = NULL;
+	size_t arraysz, numitems;
+	DIR *dirp;
+
+	if ((dirp = opendir(dirname)) == NULL)
+		return(-1);
+
+	numitems = 0;
+	arraysz = 32;	/* initial estimate of the array size */
+	names = (struct freebsd11_dirent **)malloc(
+	    arraysz * sizeof(struct freebsd11_dirent *));
+	if (names == NULL)
+		goto fail;
+
+	while ((d = freebsd11_readdir(dirp)) != NULL) {
+		if (select != NULL && !SELECT(d))
+			continue;	/* just selected names */
+		/*
+		 * Make a minimum size copy of the data
+		 */
+		p = (struct freebsd11_dirent *)malloc(FREEBSD11_DIRSIZ(d));
+		if (p == NULL)
+			goto fail;
+		p->d_fileno = d->d_fileno;
+		p->d_type = d->d_type;
+		p->d_reclen = d->d_reclen;
+		p->d_namlen = d->d_namlen;
+		bcopy(d->d_name, p->d_name, p->d_namlen + 1);
+		/*
+		 * Check to make sure the array has space left and
+		 * realloc the maximum size.
+		 */
+		if (numitems >= arraysz) {
+			struct freebsd11_dirent **names2;
+
+			names2 = reallocarray(names, arraysz,
+			    2 * sizeof(struct freebsd11_dirent *));
+			if (names2 == NULL) {
+				free(p);
+				goto fail;
+			}
+			names = names2;
+			arraysz *= 2;
+		}
+		names[numitems++] = p;
+	}
+	closedir(dirp);
+	if (numitems && dcomp != NULL)
+#ifdef I_AM_SCANDIR_B
+		qsort_b(names, numitems, sizeof(struct freebsd11_dirent *),
+		    (void*)dcomp);
+#else
+		qsort_r(names, numitems, sizeof(struct freebsd11_dirent *),
+		    &dcomp, freebsd11_alphasort_thunk);
+#endif
+	*namelist = names;
+	return (numitems);
+
+fail:
+	while (numitems > 0)
+		free(names[--numitems]);
+	free(names);
+	closedir(dirp);
+	return (-1);
+}
+
+/*
+ * Alphabetic order comparison routine for those who want it.
+ * POSIX 2008 requires that alphasort() uses strcoll().
+ */
+int
+freebsd11_alphasort(const struct freebsd11_dirent **d1,
+    const struct freebsd11_dirent **d2)
+{
+
+	return (strcoll((*d1)->d_name, (*d2)->d_name));
+}
+
+static int
+freebsd11_alphasort_thunk(void *thunk, const void *p1, const void *p2)
+{
+	int (*dc)(const struct freebsd11_dirent **, const struct
+	    freebsd11_dirent **);
+
+	dc = *(int (**)(const struct freebsd11_dirent **,
+	    const struct freebsd11_dirent **))thunk;
+	return (dc((const struct freebsd11_dirent **)p1,
+	    (const struct freebsd11_dirent **)p2));
+}
+
+__sym_compat(alphasort, freebsd11_alphasort, FBSD_1.0);
+__sym_compat(scandir, freebsd11_scandir, FBSD_1.0);
+__sym_compat(scandir_b, freebsd11_scandir_b, FBSD_1.4);

Property changes on: head/lib/libc/gen/scandir-compat11.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/lib/libc/gen/scandir.c
===================================================================
--- head/lib/libc/gen/scandir.c	(revision 318735)
+++ head/lib/libc/gen/scandir.c	(revision 318736)
@@ -1,166 +1,155 @@
 /*
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)scandir.c	8.3 (Berkeley) 1/2/94";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Scan the directory dirname calling select to make a list of selected
  * directory entries then sort using qsort and compare routine dcomp.
  * Returns the number of entries and a pointer to a list of pointers to
  * struct dirent (through namelist). Returns -1 if there were any errors.
  */
 
 #include "namespace.h"
 #include <dirent.h>
 #include <stdlib.h>
 #include <string.h>
 #include "un-namespace.h"
 
 #ifdef	I_AM_SCANDIR_B
 #include "block_abi.h"
 #define	SELECT(x)	CALL_BLOCK(select, x)
 #ifndef __BLOCKS__
 void
 qsort_b(void *, size_t, size_t, void*);
 #endif
 #else
 #define	SELECT(x)	select(x)
 #endif
 
 static int alphasort_thunk(void *thunk, const void *p1, const void *p2);
 
-/*
- * The DIRSIZ macro is the minimum record length which will hold the directory
- * entry.  This requires the amount of space in struct dirent without the
- * d_name field, plus enough space for the name and a terminating nul byte
- * (dp->d_namlen + 1), rounded up to a 4 byte boundary.
- */
-#undef DIRSIZ
-#define DIRSIZ(dp)							\
-	((sizeof(struct dirent) - sizeof(dp)->d_name) +			\
-	    (((dp)->d_namlen + 1 + 3) &~ 3))
-
 int
 #ifdef I_AM_SCANDIR_B
 scandir_b(const char *dirname, struct dirent ***namelist,
     DECLARE_BLOCK(int, select, const struct dirent *),
     DECLARE_BLOCK(int, dcomp, const struct dirent **, const struct dirent **))
 #else
 scandir(const char *dirname, struct dirent ***namelist,
     int (*select)(const struct dirent *), int (*dcomp)(const struct dirent **,
 	const struct dirent **))
 #endif
 {
 	struct dirent *d, *p, **names = NULL;
 	size_t arraysz, numitems;
 	DIR *dirp;
 
 	if ((dirp = opendir(dirname)) == NULL)
 		return(-1);
 
 	numitems = 0;
 	arraysz = 32;	/* initial estimate of the array size */
 	names = (struct dirent **)malloc(arraysz * sizeof(struct dirent *));
 	if (names == NULL)
 		goto fail;
 
 	while ((d = readdir(dirp)) != NULL) {
 		if (select != NULL && !SELECT(d))
 			continue;	/* just selected names */
 		/*
 		 * Make a minimum size copy of the data
 		 */
-		p = (struct dirent *)malloc(DIRSIZ(d));
+		p = (struct dirent *)malloc(_GENERIC_DIRSIZ(d));
 		if (p == NULL)
 			goto fail;
 		p->d_fileno = d->d_fileno;
 		p->d_type = d->d_type;
 		p->d_reclen = d->d_reclen;
 		p->d_namlen = d->d_namlen;
 		bcopy(d->d_name, p->d_name, p->d_namlen + 1);
 		/*
 		 * Check to make sure the array has space left and
 		 * realloc the maximum size.
 		 */
 		if (numitems >= arraysz) {
 			struct dirent **names2;
 
 			names2 = reallocarray(names, arraysz,
 			    2 * sizeof(struct dirent *));
 			if (names2 == NULL) {
 				free(p);
 				goto fail;
 			}
 			names = names2;
 			arraysz *= 2;
 		}
 		names[numitems++] = p;
 	}
 	closedir(dirp);
 	if (numitems && dcomp != NULL)
 #ifdef I_AM_SCANDIR_B
 		qsort_b(names, numitems, sizeof(struct dirent *), (void*)dcomp);
 #else
 		qsort_r(names, numitems, sizeof(struct dirent *),
 		    &dcomp, alphasort_thunk);
 #endif
 	*namelist = names;
 	return (numitems);
 
 fail:
 	while (numitems > 0)
 		free(names[--numitems]);
 	free(names);
 	closedir(dirp);
 	return (-1);
 }
 
 /*
  * Alphabetic order comparison routine for those who want it.
  * POSIX 2008 requires that alphasort() uses strcoll().
  */
 int
 alphasort(const struct dirent **d1, const struct dirent **d2)
 {
 
 	return (strcoll((*d1)->d_name, (*d2)->d_name));
 }
 
 static int
 alphasort_thunk(void *thunk, const void *p1, const void *p2)
 {
 	int (*dc)(const struct dirent **, const struct dirent **);
 
 	dc = *(int (**)(const struct dirent **, const struct dirent **))thunk;
 	return (dc((const struct dirent **)p1, (const struct dirent **)p2));
 }
Index: head/lib/libc/gen/telldir.h
===================================================================
--- head/lib/libc/gen/telldir.h	(revision 318735)
+++ head/lib/libc/gen/telldir.h	(revision 318736)
@@ -1,69 +1,72 @@
 /*
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Copyright (c) 2000
  * 	Daniel Eischen.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _TELLDIR_H_
 #define	_TELLDIR_H_
 
 #include <sys/queue.h>
 #include <stdbool.h>
 
 /*
  * One of these structures is malloced to describe the current directory
  * position each time telldir is called. It records the current magic
  * cookie returned by getdirentries and the offset within the buffer
  * associated with that return value.
  */
 struct ddloc {
 	LIST_ENTRY(ddloc) loc_lqe; /* entry in list */
 	long	loc_index;	/* key associated with structure */
-	long	loc_seek;	/* magic cookie returned by getdirentries */
+	off_t	loc_seek;	/* magic cookie returned by getdirentries */
 	long	loc_loc;	/* offset of entry in buffer */
 };
 
 /*
  * One of these structures is malloced for each DIR to record telldir
  * positions.
  */
 struct _telldir {
 	LIST_HEAD(, ddloc) td_locq; /* list of locations */
 	long	td_loccnt;	/* index of entry for sequential readdir's */
 };
 
 bool		_filldir(DIR *, bool);
 struct dirent	*_readdir_unlocked(DIR *, int);
 void 		_reclaim_telldir(DIR *);
 void 		_seekdir(DIR *, long);
 void		_fixtelldir(DIR *dirp, long oldseek, long oldloc);
+
+#define	RDU_SKIP	0x0001
+#define	RDU_SHORT	0x0002
 
 #endif
Index: head/lib/libc/include/compat.h
===================================================================
--- head/lib/libc/include/compat.h	(revision 318735)
+++ head/lib/libc/include/compat.h	(revision 318736)
@@ -1,57 +1,78 @@
 /*-
  * Copyright (c) 2009 Hudson River Trading LLC
  * Written by: John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This file defines compatibility symbol versions for old system calls.  It
  * is included in all generated system call files.
  */
 
 #ifndef __LIBC_COMPAT_H__
 #define	__LIBC_COMPAT_H__
 
 #define	__sym_compat(sym,impl,verid)	\
 	.symver impl, sym@verid
 
 #ifndef NO_COMPAT7
 __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0);
 __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0);
 __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0);
 #endif
 
+__sym_compat(nfstat, freebsd11_nfstat, FBSD_1.0);
+__sym_compat(nlstat, freebsd11_nlstat, FBSD_1.0);
+__sym_compat(nstat, freebsd11_nstat, FBSD_1.0);
+
+__sym_compat(fhstat, freebsd11_fhstat, FBSD_1.0);
+__sym_compat(fstat, freebsd11_fstat, FBSD_1.0);
+__sym_compat(fstatat, freebsd11_fstatat, FBSD_1.1);
+__sym_compat(lstat, freebsd11_lstat, FBSD_1.0);
+__sym_compat(stat, freebsd11_stat, FBSD_1.0);
+
+__sym_compat(getdents, freebsd11_getdents, FBSD_1.0);
+__sym_compat(getdirentries, freebsd11_getdirentries, FBSD_1.0);
+
+__sym_compat(getfsstat, freebsd11_getfsstat, FBSD_1.0);
+__sym_compat(fhstatfs, freebsd11_fhstatfs, FBSD_1.0);
+__sym_compat(fstatfs, freebsd11_fstatfs, FBSD_1.0);
+__sym_compat(statfs, freebsd11_statfs, FBSD_1.0);
+
+__sym_compat(mknod, freebsd11_mknod, FBSD_1.0);
+__sym_compat(mknodat, freebsd11_mknodat, FBSD_1.1);
+
 #undef __sym_compat
 
 #define	__weak_reference(sym,alias)	\
 	.weak	alias;.equ	alias,sym
 
 __weak_reference(__sys_fcntl,__fcntl_compat)
 
 #undef __weak_reference
 
 #endif	/* __LIBC_COMPAT_H__ */
 
Index: head/lib/libc/include/libc_private.h
===================================================================
--- head/lib/libc/include/libc_private.h	(revision 318735)
+++ head/lib/libc/include/libc_private.h	(revision 318736)
@@ -1,409 +1,412 @@
 /*
  * Copyright (c) 1998 John Birrell <jb@cimlogic.com.au>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  * Private definitions for libc, libc_r and libpthread.
  *
  */
 
 #ifndef _LIBC_PRIVATE_H_
 #define _LIBC_PRIVATE_H_
 #include <sys/_types.h>
 #include <sys/_pthreadtypes.h>
 
 /*
  * This global flag is non-zero when a process has created one
  * or more threads. It is used to avoid calling locking functions
  * when they are not required.
  */
 extern int	__isthreaded;
 
 /*
  * Elf_Auxinfo *__elf_aux_vector, the pointer to the ELF aux vector
  * provided by kernel. Either set for us by rtld, or found at runtime
  * on stack for static binaries.
  *
  * Type is void to avoid polluting whole libc with ELF types.
  */
 extern void	*__elf_aux_vector;
 
 /*
  * libc should use libc_dlopen internally, which respects a global
  * flag where loading of new shared objects can be restricted.
  */
 void *libc_dlopen(const char *, int);
 
 /*
  * For dynamic linker.
  */
 void _rtld_error(const char *fmt, ...);
 
 /*
  * File lock contention is difficult to diagnose without knowing
  * where locks were set. Allow a debug library to be built which
  * records the source file and line number of each lock call.
  */
 #ifdef	_FLOCK_DEBUG
 #define _FLOCKFILE(x)	_flockfile_debug(x, __FILE__, __LINE__)
 #else
 #define _FLOCKFILE(x)	_flockfile(x)
 #endif
 
 /*
  * Macros for locking and unlocking FILEs. These test if the
  * process is threaded to avoid locking when not required.
  */
 #define	FLOCKFILE(fp)		if (__isthreaded) _FLOCKFILE(fp)
 #define	FUNLOCKFILE(fp)		if (__isthreaded) _funlockfile(fp)
 
 struct _spinlock;
 extern struct _spinlock __stdio_thread_lock __hidden;
 #define STDIO_THREAD_LOCK()				\
 do {							\
 	if (__isthreaded)				\
 		_SPINLOCK(&__stdio_thread_lock);	\
 } while (0)
 #define STDIO_THREAD_UNLOCK()				\
 do {							\
 	if (__isthreaded)				\
 		_SPINUNLOCK(&__stdio_thread_lock);	\
 } while (0)
 
 void		__libc_spinlock_stub(struct _spinlock *);
 void		__libc_spinunlock_stub(struct _spinlock *);
 
 /*
  * Indexes into the pthread jump table.
  *
  * Warning! If you change this type, you must also change the threads
  * libraries that reference it (libc_r, libpthread).
  */
 typedef enum {
 	PJT_ATFORK,
 	PJT_ATTR_DESTROY,
 	PJT_ATTR_GETDETACHSTATE,
 	PJT_ATTR_GETGUARDSIZE,
 	PJT_ATTR_GETINHERITSCHED,
 	PJT_ATTR_GETSCHEDPARAM,
 	PJT_ATTR_GETSCHEDPOLICY,
 	PJT_ATTR_GETSCOPE,
 	PJT_ATTR_GETSTACKADDR,
 	PJT_ATTR_GETSTACKSIZE,
 	PJT_ATTR_INIT,
 	PJT_ATTR_SETDETACHSTATE,
 	PJT_ATTR_SETGUARDSIZE,
 	PJT_ATTR_SETINHERITSCHED,
 	PJT_ATTR_SETSCHEDPARAM,
 	PJT_ATTR_SETSCHEDPOLICY,
 	PJT_ATTR_SETSCOPE,
 	PJT_ATTR_SETSTACKADDR,
 	PJT_ATTR_SETSTACKSIZE,
 	PJT_CANCEL,
 	PJT_CLEANUP_POP,
 	PJT_CLEANUP_PUSH,
 	PJT_COND_BROADCAST,
 	PJT_COND_DESTROY,
 	PJT_COND_INIT,
 	PJT_COND_SIGNAL,
 	PJT_COND_TIMEDWAIT,
 	PJT_COND_WAIT,
 	PJT_DETACH,
 	PJT_EQUAL,
 	PJT_EXIT,
 	PJT_GETSPECIFIC,
 	PJT_JOIN,
 	PJT_KEY_CREATE,
 	PJT_KEY_DELETE,
 	PJT_KILL,
 	PJT_MAIN_NP,
 	PJT_MUTEXATTR_DESTROY,
 	PJT_MUTEXATTR_INIT,
 	PJT_MUTEXATTR_SETTYPE,
 	PJT_MUTEX_DESTROY,
 	PJT_MUTEX_INIT,
 	PJT_MUTEX_LOCK,
 	PJT_MUTEX_TRYLOCK,
 	PJT_MUTEX_UNLOCK,
 	PJT_ONCE,
 	PJT_RWLOCK_DESTROY,
 	PJT_RWLOCK_INIT,
 	PJT_RWLOCK_RDLOCK,
 	PJT_RWLOCK_TRYRDLOCK,
 	PJT_RWLOCK_TRYWRLOCK,
 	PJT_RWLOCK_UNLOCK,
 	PJT_RWLOCK_WRLOCK,
 	PJT_SELF,
 	PJT_SETCANCELSTATE,
 	PJT_SETCANCELTYPE,
 	PJT_SETSPECIFIC,
 	PJT_SIGMASK,
 	PJT_TESTCANCEL,
 	PJT_CLEANUP_POP_IMP,
 	PJT_CLEANUP_PUSH_IMP,
 	PJT_CANCEL_ENTER,
 	PJT_CANCEL_LEAVE,
 	PJT_MUTEX_CONSISTENT,
 	PJT_MUTEXATTR_GETROBUST,
 	PJT_MUTEXATTR_SETROBUST,
 	PJT_MAX
 } pjt_index_t;
 
 typedef int (*pthread_func_t)(void);
 typedef pthread_func_t pthread_func_entry_t[2];
 
 extern pthread_func_entry_t __thr_jtable[];
 
 void	__set_error_selector(int *(*arg)(void));
 int	_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
 	    void *(calloc_cb)(__size_t, __size_t));
 
 typedef int (*interpos_func_t)(void);
 interpos_func_t *__libc_interposing_slot(int interposno);
 extern interpos_func_t __libc_interposing[] __hidden;
 
 enum {
 	INTERPOS_accept,
 	INTERPOS_accept4,
 	INTERPOS_aio_suspend,
 	INTERPOS_close,
 	INTERPOS_connect,
 	INTERPOS_fcntl,
 	INTERPOS_fsync,
 	INTERPOS_fork,
 	INTERPOS_msync,
 	INTERPOS_nanosleep,
 	INTERPOS_openat,
 	INTERPOS_poll,
 	INTERPOS_pselect,
 	INTERPOS_recvfrom,
 	INTERPOS_recvmsg,
 	INTERPOS_select,
 	INTERPOS_sendmsg,
 	INTERPOS_sendto,
 	INTERPOS_setcontext,
 	INTERPOS_sigaction,
 	INTERPOS_sigprocmask,
 	INTERPOS_sigsuspend,
 	INTERPOS_sigwait,
 	INTERPOS_sigtimedwait,
 	INTERPOS_sigwaitinfo,
 	INTERPOS_swapcontext,
 	INTERPOS_system,
 	INTERPOS_tcdrain,
 	INTERPOS_read,
 	INTERPOS_readv,
 	INTERPOS_wait4,
 	INTERPOS_write,
 	INTERPOS_writev,
 	INTERPOS__pthread_mutex_init_calloc_cb,
 	INTERPOS_spinlock,
 	INTERPOS_spinunlock,
 	INTERPOS_kevent,
 	INTERPOS_wait6,
 	INTERPOS_ppoll,
 	INTERPOS_map_stacks_exec,
 	INTERPOS_fdatasync,
 	INTERPOS_clock_nanosleep,
 	INTERPOS_MAX
 };
 
 /*
  * yplib internal interfaces
  */
 #ifdef YP
 int _yp_check(char **);
 #endif
 
 /*
  * Initialise TLS for static programs
  */
 void _init_tls(void);
 
 /*
  * Provides pthread_once()-like functionality for both single-threaded
  * and multi-threaded applications.
  */
 int _once(pthread_once_t *, void (*)(void));
 
 /*
  * Set the TLS thread pointer
  */
 void _set_tp(void *tp);
 
 /*
  * This is a pointer in the C run-time startup code. It is used
  * by getprogname() and setprogname().
  */
 extern const char *__progname;
 
 /*
  * This function is used by the threading libraries to notify malloc that a
  * thread is exiting.
  */
 void _malloc_thread_cleanup(void);
 
 /*
  * This function is used by the threading libraries to notify libc that a
  * thread is exiting, so its thread-local dtors should be called.
  */
 void __cxa_thread_call_dtors(void);
 int __cxa_thread_atexit_hidden(void (*dtor_func)(void *), void *obj,
     void *dso_symbol) __hidden;
 
 /*
  * These functions are used by the threading libraries in order to protect
  * malloc across fork().
  */
 void _malloc_prefork(void);
 void _malloc_postfork(void);
 
 void _malloc_first_thread(void);
 
 /*
  * Function to clean up streams, called from abort() and exit().
  */
 extern void (*__cleanup)(void) __hidden;
 
 /*
  * Get kern.osreldate to detect ABI revisions.  Explicitly
  * ignores value of $OSVERSION and caches result.
  */
 int __getosreldate(void);
 #include <sys/_types.h>
 #include <sys/_sigset.h>
 
 struct aiocb;
 struct fd_set;
 struct iovec;
 struct kevent;
 struct msghdr;
 struct pollfd;
 struct rusage;
 struct sigaction;
 struct sockaddr;
+struct stat;
 struct timespec;
 struct timeval;
 struct timezone;
 struct __siginfo;
 struct __ucontext;
 struct __wrusage;
 enum idtype;
 int		__sys_aio_suspend(const struct aiocb * const[], int,
 		    const struct timespec *);
 int		__sys_accept(int, struct sockaddr *, __socklen_t *);
 int		__sys_accept4(int, struct sockaddr *, __socklen_t *, int);
 int		__sys_clock_gettime(__clockid_t, struct timespec *ts);
 int		__sys_clock_nanosleep(__clockid_t, int,
 		    const struct timespec *, struct timespec *);
 int		__sys_close(int);
 int		__sys_connect(int, const struct sockaddr *, __socklen_t);
+__ssize_t	__sys_getdirentries(int, char *, __size_t, __off_t *);
 int		__sys_fcntl(int, int, ...);
 int		__sys_fdatasync(int);
+int		__sys_fstatat(int, const char *, struct stat *, int);
 int		__sys_fsync(int);
 __pid_t		__sys_fork(void);
 int		__sys_ftruncate(int, __off_t);
 int		__sys_gettimeofday(struct timeval *, struct timezone *);
 int		__sys_kevent(int, const struct kevent *, int, struct kevent *,
 		    int, const struct timespec *);
 __off_t		__sys_lseek(int, __off_t, int);
 void	       *__sys_mmap(void *, __size_t, int, int, int, __off_t);
 int		__sys_msync(void *, __size_t, int);
 int		__sys_nanosleep(const struct timespec *, struct timespec *);
 int		__sys_open(const char *, int, ...);
 int		__sys_openat(int, const char *, int, ...);
 int		__sys_pselect(int, struct fd_set *, struct fd_set *,
 		    struct fd_set *, const struct timespec *,
 		    const __sigset_t *);
 int		__sys_ptrace(int, __pid_t, char *, int);
 int		__sys_poll(struct pollfd *, unsigned, int);
 int		__sys_ppoll(struct pollfd *, unsigned, const struct timespec *,
 		    const __sigset_t *);
 __ssize_t	__sys_pread(int, void *, __size_t, __off_t);
 __ssize_t	__sys_pwrite(int, const void *, __size_t, __off_t);
 __ssize_t	__sys_read(int, void *, __size_t);
 __ssize_t	__sys_readv(int, const struct iovec *, int);
 __ssize_t	__sys_recv(int, void *, __size_t, int);
 __ssize_t	__sys_recvfrom(int, void *, __size_t, int, struct sockaddr *,
 		    __socklen_t *);
 __ssize_t	__sys_recvmsg(int, struct msghdr *, int);
 int		__sys_select(int, struct fd_set *, struct fd_set *,
 		    struct fd_set *, struct timeval *);
 __ssize_t	__sys_sendmsg(int, const struct msghdr *, int);
 __ssize_t	__sys_sendto(int, const void *, __size_t, int,
 		    const struct sockaddr *, __socklen_t);
 int		__sys_setcontext(const struct __ucontext *);
 int		__sys_sigaction(int, const struct sigaction *,
 		    struct sigaction *);
 int		__sys_sigprocmask(int, const __sigset_t *, __sigset_t *);
 int		__sys_sigsuspend(const __sigset_t *);
 int		__sys_sigtimedwait(const __sigset_t *, struct __siginfo *,
 		    const struct timespec *);
 int		__sys_sigwait(const __sigset_t *, int *);
 int		__sys_sigwaitinfo(const __sigset_t *, struct __siginfo *);
 int		__sys_swapcontext(struct __ucontext *,
 		    const struct __ucontext *);
 int		__sys_thr_kill(long, int);
 int		__sys_thr_self(long *);
 int		__sys_truncate(const char *, __off_t);
 __pid_t		__sys_wait4(__pid_t, int *, int, struct rusage *);
 __pid_t		__sys_wait6(enum idtype, __id_t, int *, int,
 		    struct __wrusage *, struct __siginfo *);
 __ssize_t	__sys_write(int, const void *, __size_t);
 __ssize_t	__sys_writev(int, const struct iovec *, int);
 
 int		__libc_sigaction(int, const struct sigaction *,
 		    struct sigaction *) __hidden;
 int		__libc_sigprocmask(int, const __sigset_t *, __sigset_t *)
 		    __hidden;
 int		__libc_sigsuspend(const __sigset_t *) __hidden;
 int		__libc_sigwait(const __sigset_t * __restrict,
 		    int * restrict sig);
 int		__libc_system(const char *);
 int		__libc_tcdrain(int);
 int		__fcntl_compat(int fd, int cmd, ...);
 
 int		__sys_futimens(int fd, const struct timespec *times) __hidden;
 int		__sys_utimensat(int fd, const char *path,
 		    const struct timespec *times, int flag) __hidden;
 
 /* execve() with PATH processing to implement posix_spawnp() */
 int _execvpe(const char *, char * const *, char * const *);
 
 int _elf_aux_info(int aux, void *buf, int buflen);
 struct dl_phdr_info;
 int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
 void __init_elf_aux_vector(void);
 void __libc_map_stacks_exec(void);
 
 void	_pthread_cancel_enter(int);
 void	_pthread_cancel_leave(int);
 
 void __throw_constraint_handler_s(const char * restrict msg, int error);
 
 #endif /* _LIBC_PRIVATE_H_ */
Index: head/lib/libc/sys/Makefile.inc
===================================================================
--- head/lib/libc/sys/Makefile.inc	(revision 318735)
+++ head/lib/libc/sys/Makefile.inc	(revision 318736)
@@ -1,488 +1,490 @@
 #	@(#)Makefile.inc	8.3 (Berkeley) 10/24/94
 # $FreeBSD$
 
 # sys sources
 .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/sys ${LIBC_SRCTOP}/sys
 
 # Include the generated makefile containing the *complete* list
 # of syscall names in MIASM.
 .include "${SRCTOP}/sys/sys/syscall.mk"
 
 # Include machine dependent definitions.
 #
 # MDASM names override the default syscall names in MIASM.
 # NOASM will prevent the default syscall code from being generated.
 # PSEUDO generates _<sys>() and __sys_<sys>() symbols, but not <sys>().
 #
 # While historically machine dependent, all architectures have the following
 # declarations in common:
 #
 NOASM=	break.o \
 	exit.o \
 	getlogin.o \
 	sstk.o \
 	yield.o
 PSEUDO=	_exit.o \
 	_getlogin.o
 .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/sys/Makefile.inc"
 
 SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c
 NOASM+=  clock_gettime.o gettimeofday.o
 PSEUDO+= _clock_gettime.o _gettimeofday.o
 
 # Sources common to both syscall interfaces:
 SRCS+=	\
 	__error.c \
 	interposing_table.c
 
+SRCS+= getdents.c lstat.c mknod.c stat.c
+
 SRCS+= futimens.c utimensat.c
 NOASM+= futimens.o utimensat.o
 PSEUDO+= _futimens.o _utimensat.o
 
 SRCS+= pipe.c
 
 INTERPOSED = \
 	accept \
 	accept4 \
 	aio_suspend \
 	clock_nanosleep \
 	close \
 	connect \
 	fcntl \
 	fdatasync \
 	fsync \
 	fork \
 	kevent \
 	msync \
 	nanosleep \
 	open \
 	openat \
 	poll \
 	ppoll \
 	pselect \
 	ptrace \
 	read \
 	readv \
 	recvfrom \
 	recvmsg \
 	select \
 	sendmsg \
 	sendto \
 	setcontext \
 	sigprocmask \
 	sigsuspend \
 	sigtimedwait \
 	sigwait \
 	sigwaitinfo \
 	swapcontext \
 	wait4 \
 	wait6 \
 	write \
 	writev
 
 .if ${MACHINE_CPUARCH} == "sparc64"
 SRCS+=	sigaction.c
 NOASM+=	sigaction.o
 .else
 INTERPOSED+= sigaction
 .endif
 
 SRCS+=	${INTERPOSED:S/$/.c/}
 NOASM+=	${INTERPOSED:S/$/.o/}
 PSEUDO+=	${INTERPOSED:C/^.*$/_&.o/}
 
 # Add machine dependent asm sources:
 SRCS+=${MDASM}
 
 # Look though the complete list of syscalls (MIASM) for names that are
 # not defined with machine dependent implementations (MDASM) and are
 # not declared for no generation of default code (NOASM).  Add each
 # syscall that satisfies these conditions to the ASM list.
 .for _asm in ${MIASM}
 .if (${MDASM:R:M${_asm:R}} == "")
 .if (${NOASM:R:M${_asm:R}} == "")
 ASM+=$(_asm)
 .endif
 .endif
 .endfor
 
 SASM=	${ASM:S/.o/.S/}
 
 SPSEUDO= ${PSEUDO:S/.o/.S/}
 
 SRCS+=	${SASM} ${SPSEUDO}
 
 SYM_MAPS+=	${LIBC_SRCTOP}/sys/Symbol.map
 
 # Generated files
 CLEANFILES+=	${SASM} ${SPSEUDO}
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \
     ${MACHINE_CPUARCH} == "powerpc" || ${MACHINE_ARCH:Marmv6*}
 NOTE_GNU_STACK='\t.section .note.GNU-stack,"",%%progbits\n'
 .else
 NOTE_GNU_STACK=''
 .endif
 
 ${SASM}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET}
 	printf  ${NOTE_GNU_STACK} >>${.TARGET}
 
 ${SPSEUDO}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \
 	    >> ${.TARGET}
 	printf ${NOTE_GNU_STACK} >>${.TARGET}
 
 MAN+=	abort2.2 \
 	accept.2 \
 	access.2 \
 	acct.2 \
 	adjtime.2 \
 	aio_cancel.2 \
 	aio_error.2 \
 	aio_fsync.2 \
 	aio_mlock.2 \
 	aio_read.2 \
 	aio_return.2 \
 	aio_suspend.2 \
 	aio_waitcomplete.2 \
 	aio_write.2 \
 	bind.2 \
 	bindat.2 \
 	brk.2 \
 	cap_enter.2 \
 	cap_fcntls_limit.2 \
 	cap_ioctls_limit.2 \
 	cap_rights_limit.2 \
 	chdir.2 \
 	chflags.2 \
 	chmod.2 \
 	chown.2 \
 	chroot.2 \
 	clock_gettime.2 \
 	close.2 \
 	closefrom.2 \
 	connect.2 \
 	connectat.2 \
 	cpuset.2 \
 	cpuset_getaffinity.2 \
 	dup.2 \
 	execve.2 \
 	_exit.2 \
 	extattr_get_file.2 \
 	fcntl.2 \
 	ffclock.2 \
 	fhopen.2 \
 	flock.2 \
 	fork.2 \
 	fsync.2 \
 	getdirentries.2 \
 	getdtablesize.2 \
 	getfh.2 \
 	getfsstat.2 \
 	getgid.2 \
 	getgroups.2 \
 	getitimer.2 \
 	getlogin.2 \
 	getloginclass.2 \
 	getpeername.2 \
 	getpgrp.2 \
 	getpid.2 \
 	getpriority.2 \
 	getrlimit.2 \
 	getrusage.2 \
 	getsid.2 \
 	getsockname.2 \
 	getsockopt.2 \
 	gettimeofday.2 \
 	getuid.2 \
 	intro.2 \
 	ioctl.2 \
 	issetugid.2 \
 	jail.2 \
 	kenv.2 \
 	kill.2 \
 	kldfind.2 \
 	kldfirstmod.2 \
 	kldload.2 \
 	kldnext.2 \
 	kldstat.2 \
 	kldsym.2 \
 	kldunload.2 \
 	kqueue.2 \
 	ktrace.2 \
 	link.2 \
 	lio_listio.2 \
 	listen.2 \
 	lseek.2 \
 	madvise.2 \
 	mincore.2 \
 	minherit.2 \
 	mkdir.2 \
 	mkfifo.2 \
 	mknod.2 \
 	mlock.2 \
 	mlockall.2 \
 	mmap.2 \
 	modfind.2 \
 	modnext.2 \
 	modstat.2 \
 	mount.2 \
 	mprotect.2 \
 	mq_close.2 \
 	mq_getattr.2 \
 	mq_notify.2 \
 	mq_open.2 \
 	mq_receive.2 \
 	mq_send.2 \
 	mq_setattr.2 \
 	msgctl.2 \
 	msgget.2 \
 	msgrcv.2 \
 	msgsnd.2 \
 	msync.2 \
 	munmap.2 \
 	nanosleep.2 \
 	nfssvc.2 \
 	ntp_adjtime.2 \
 	numa_getaffinity.2 \
 	open.2 \
 	pathconf.2 \
 	pdfork.2 \
 	pipe.2 \
 	poll.2 \
 	posix_fadvise.2 \
 	posix_fallocate.2 \
 	posix_openpt.2 \
 	procctl.2 \
 	profil.2 \
 	pselect.2 \
 	ptrace.2 \
 	quotactl.2 \
 	rctl_add_rule.2 \
 	read.2 \
 	readlink.2 \
 	reboot.2 \
 	recv.2 \
 	rename.2 \
 	revoke.2 \
 	rfork.2 \
 	rmdir.2 \
 	rtprio.2
 .if !defined(NO_P1003_1B)
 MAN+=	sched_get_priority_max.2 \
 	sched_setparam.2 \
 	sched_setscheduler.2 \
 	sched_yield.2
 .endif
 MAN+=	sctp_generic_recvmsg.2 \
 	sctp_generic_sendmsg.2 \
 	sctp_peeloff.2 \
 	select.2 \
 	semctl.2 \
 	semget.2 \
 	semop.2 \
 	send.2 \
 	setfib.2 \
 	sendfile.2 \
 	setgroups.2 \
 	setpgid.2 \
 	setregid.2 \
 	setresuid.2 \
 	setreuid.2 \
 	setsid.2 \
 	setuid.2 \
 	shmat.2 \
 	shmctl.2 \
 	shmget.2 \
 	shm_open.2 \
 	shutdown.2 \
 	sigaction.2 \
 	sigaltstack.2 \
 	sigpending.2 \
 	sigprocmask.2 \
 	sigqueue.2 \
 	sigreturn.2 \
 	sigstack.2 \
 	sigsuspend.2 \
 	sigwait.2 \
 	sigwaitinfo.2 \
 	socket.2 \
 	socketpair.2 \
 	stat.2 \
 	statfs.2 \
 	swapon.2 \
 	symlink.2 \
 	sync.2 \
 	sysarch.2 \
 	syscall.2 \
 	thr_exit.2 \
 	thr_kill.2 \
 	thr_new.2 \
 	thr_self.2 \
 	thr_set_name.2 \
 	thr_suspend.2 \
 	thr_wake.2 \
 	timer_create.2 \
 	timer_delete.2 \
 	timer_settime.2 \
 	truncate.2 \
 	umask.2 \
 	undelete.2 \
 	unlink.2 \
 	utimensat.2 \
 	utimes.2 \
 	utrace.2 \
 	uuidgen.2 \
 	vfork.2 \
 	wait.2 \
 	write.2 \
 	_umtx_op.2
 
 MLINKS+=accept.2 accept4.2
 MLINKS+=access.2 eaccess.2 \
 	access.2 faccessat.2
 MLINKS+=brk.2 sbrk.2
 MLINKS+=cap_enter.2 cap_getmode.2
 MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2
 MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2
 MLINKS+=cap_rights_limit.2 cap_rights_get.2
 MLINKS+=chdir.2 fchdir.2
 MLINKS+=chflags.2 chflagsat.2 \
 	chflags.2 fchflags.2 \
 	chflags.2 lchflags.2
 MLINKS+=chmod.2 fchmod.2 \
 	chmod.2 fchmodat.2 \
 	chmod.2 lchmod.2
 MLINKS+=chown.2 fchown.2 \
 	chown.2 fchownat.2 \
 	chown.2 lchown.2
 MLINKS+=clock_gettime.2 clock_getres.2 \
 	clock_gettime.2 clock_settime.2
 MLINKS+=nanosleep.2 clock_nanosleep.2
 MLINKS+=cpuset.2 cpuset_getid.2 \
 	cpuset.2 cpuset_setid.2
 MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2
 MLINKS+=dup.2 dup2.2
 MLINKS+=execve.2 fexecve.2
 MLINKS+=extattr_get_file.2 extattr.2 \
 	extattr_get_file.2 extattr_delete_fd.2 \
 	extattr_get_file.2 extattr_delete_file.2 \
 	extattr_get_file.2 extattr_delete_link.2 \
 	extattr_get_file.2 extattr_get_fd.2 \
 	extattr_get_file.2 extattr_get_link.2 \
 	extattr_get_file.2 extattr_list_fd.2 \
 	extattr_get_file.2 extattr_list_file.2 \
 	extattr_get_file.2 extattr_list_link.2 \
 	extattr_get_file.2 extattr_set_fd.2 \
 	extattr_get_file.2 extattr_set_file.2 \
 	extattr_get_file.2 extattr_set_link.2
 MLINKS+=ffclock.2 ffclock_getcounter.2 \
 	ffclock.2 ffclock_getestimate.2 \
 	ffclock.2 ffclock_setestimate.2
 MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatfs.2
 MLINKS+=fsync.2 fdatasync.2
 MLINKS+=getdirentries.2 getdents.2
 MLINKS+=getfh.2 lgetfh.2
 MLINKS+=getgid.2 getegid.2
 MLINKS+=getitimer.2 setitimer.2
 MLINKS+=getlogin.2 getlogin_r.3
 MLINKS+=getlogin.2 setlogin.2
 MLINKS+=getloginclass.2 setloginclass.2
 MLINKS+=getpgrp.2 getpgid.2
 MLINKS+=getpid.2 getppid.2
 MLINKS+=getpriority.2 setpriority.2
 MLINKS+=getrlimit.2 setrlimit.2
 MLINKS+=getsockopt.2 setsockopt.2
 MLINKS+=gettimeofday.2 settimeofday.2
 MLINKS+=getuid.2 geteuid.2
 MLINKS+=intro.2 errno.2
 MLINKS+=jail.2 jail_attach.2 \
 	jail.2 jail_get.2 \
 	jail.2 jail_remove.2 \
 	jail.2 jail_set.2
 MLINKS+=kldunload.2 kldunloadf.2
 MLINKS+=kqueue.2 kevent.2 \
 	kqueue.2 EV_SET.3
 MLINKS+=link.2 linkat.2
 MLINKS+=madvise.2 posix_madvise.2
 MLINKS+=mkdir.2 mkdirat.2
 MLINKS+=mkfifo.2 mkfifoat.2
 MLINKS+=mknod.2 mknodat.2
 MLINKS+=mlock.2 munlock.2
 MLINKS+=mlockall.2 munlockall.2
 MLINKS+=modnext.2 modfnext.2
 MLINKS+=mount.2 nmount.2 \
 	mount.2 unmount.2
 MLINKS+=mq_receive.2 mq_timedreceive.2
 MLINKS+=mq_send.2 mq_timedsend.2
 MLINKS+=ntp_adjtime.2 ntp_gettime.2
 MLINKS+=numa_getaffinity.2 numa_setaffinity.2
 MLINKS+=open.2 openat.2
 MLINKS+=pathconf.2 fpathconf.2
 MLINKS+=pathconf.2 lpathconf.2
 MLINKS+=pdfork.2 pdgetpid.2\
 	pdfork.2 pdkill.2 \
 	pdfork.2 pdwait4.2
 MLINKS+=pipe.2 pipe2.2
 MLINKS+=poll.2 ppoll.2
 MLINKS+=rctl_add_rule.2 rctl_get_limits.2 \
 	rctl_add_rule.2 rctl_get_racct.2 \
 	rctl_add_rule.2 rctl_get_rules.2 \
 	rctl_add_rule.2 rctl_remove_rule.2
 MLINKS+=read.2 pread.2 \
 	read.2 preadv.2 \
 	read.2 readv.2
 MLINKS+=readlink.2 readlinkat.2
 MLINKS+=recv.2 recvfrom.2 \
 	recv.2 recvmsg.2
 MLINKS+=rename.2 renameat.2
 MLINKS+=rtprio.2 rtprio_thread.2
 .if !defined(NO_P1003_1B)
 MLINKS+=sched_get_priority_max.2 sched_get_priority_min.2 \
 	sched_get_priority_max.2 sched_rr_get_interval.2
 MLINKS+=sched_setparam.2 sched_getparam.2
 MLINKS+=sched_setscheduler.2 sched_getscheduler.2
 .endif
 MLINKS+=select.2 FD_CLR.3 \
 	select.2 FD_ISSET.3 \
 	select.2 FD_SET.3 \
 	select.2 FD_ZERO.3
 MLINKS+=send.2 sendmsg.2 \
 	send.2 sendto.2
 MLINKS+=setpgid.2 setpgrp.2
 MLINKS+=setresuid.2 getresgid.2 \
 	setresuid.2 getresuid.2 \
 	setresuid.2 setresgid.2
 MLINKS+=setuid.2 setegid.2 \
 	setuid.2 seteuid.2 \
 	setuid.2 setgid.2
 MLINKS+=shmat.2 shmdt.2
 MLINKS+=shm_open.2 shm_unlink.2
 MLINKS+=sigwaitinfo.2 sigtimedwait.2
 MLINKS+=stat.2 fstat.2 \
 	stat.2 fstatat.2 \
 	stat.2 lstat.2
 MLINKS+=statfs.2 fstatfs.2
 MLINKS+=swapon.2 swapoff.2
 MLINKS+=symlink.2 symlinkat.2
 MLINKS+=syscall.2 __syscall.2
 MLINKS+=timer_settime.2 timer_getoverrun.2 \
 	timer_settime.2 timer_gettime.2
 MLINKS+=thr_kill.2 thr_kill2.2
 MLINKS+=truncate.2 ftruncate.2
 MLINKS+=unlink.2 unlinkat.2
 MLINKS+=utimensat.2 futimens.2
 MLINKS+=utimes.2 futimes.2 \
 	utimes.2 futimesat.2 \
 	utimes.2 lutimes.2
 MLINKS+=wait.2 wait3.2 \
 	wait.2 wait4.2 \
 	wait.2 waitpid.2 \
 	wait.2 waitid.2 \
 	wait.2 wait6.2
 MLINKS+=write.2 pwrite.2 \
 	write.2 pwritev.2 \
 	write.2 writev.2
Index: head/lib/libc/sys/Symbol.map
===================================================================
--- head/lib/libc/sys/Symbol.map	(revision 318735)
+++ head/lib/libc/sys/Symbol.map	(revision 318736)
@@ -1,1049 +1,1034 @@
 /*
  * $FreeBSD$
  */
 
 /*
  * It'd be nice to automatically generate the syscall symbols, but we
  * don't know to what version they will eventually belong to, so for now
  * it has to be manual.
  */
 FBSD_1.0 {
 	__acl_aclcheck_fd;
 	__acl_aclcheck_file;
 	__acl_aclcheck_link;
 	__acl_delete_fd;
 	__acl_delete_file;
 	__acl_delete_link;
 	__acl_get_fd;
 	__acl_get_file;
 	__acl_get_link;
 	__acl_set_fd;
 	__acl_set_file;
 	__acl_set_link;
 	__getcwd;
 	__mac_execve;
 	__mac_get_fd;
 	__mac_get_file;
 	__mac_get_link;
 	__mac_get_pid;
 	__mac_get_proc;
 	__mac_set_fd;
 	__mac_set_file;
 	__mac_set_link;
 	__mac_set_proc;
 	__setugid;
 	__syscall;
 	__sysctl;
 	_umtx_op;
 	abort2;
 	accept;
 	access;
 	acct;
 	adjtime;
 	aio_cancel;
 	aio_error;
 	aio_fsync;
 	aio_read;
 	aio_return;
 	aio_suspend;
 	aio_waitcomplete;
 	aio_write;
 	audit;
 	auditctl;
 	auditon;
 	bind;
 	chdir;
 	chflags;
 	chmod;
 	chown;
 	chroot;
 	clock_getres;
 	clock_gettime;
 	clock_settime;
 	close;
 	connect;
 	dup;
 	dup2;
 	eaccess;
 	execve;
 	extattr_delete_fd;
 	extattr_delete_file;
 	extattr_delete_link;
 	extattr_get_fd;
 	extattr_get_file;
 	extattr_get_link;
 	extattr_list_fd;
 	extattr_list_file;
 	extattr_list_link;
 	extattr_set_fd;
 	extattr_set_file;
 	extattr_set_link;
 	extattrctl;
 	fchdir;
 	fchflags;
 	fchmod;
 	fchown;
 	fcntl;
 	fhopen;
-	fhstat;
-	fhstatfs;
 	flock;
 	fork;
 	fpathconf;
-	fstat;
-	fstatfs;
 	fsync;
 	futimes;
 	getaudit;
 	getaudit_addr;
 	getauid;
 	getcontext;
-	getdents;
-	getdirentries;
 	getdtablesize;
 	getegid;
 	geteuid;
 	getfh;
-	getfsstat;
 	getgid;
 	getgroups;
 	getitimer;
 	getpeername;
 	getpgid;
 	getpgrp;
 	getpid;
 	getppid;
 	getpriority;
 	getresgid;
 	getresuid;
 	getrlimit;
 	getrusage;
 	getsid;
 	getsockname;
 	getsockopt;
 	gettimeofday;
 	getuid;
 	ioctl;
 	issetugid;
 	jail;
 	jail_attach;
 	kenv;
 	kevent;
 	kill;
 	kldfind;
 	kldfirstmod;
 	kldload;
 	kldnext;
 	kldstat;
 	kldsym;
 	kldunload;
 	kldunloadf;
 	kqueue;
 	kmq_notify;		/* Do we want these to be public interfaces? */
 	kmq_open;		/* librt uses them to provide mq_xxx. */
 	kmq_setattr;
 	kmq_timedreceive;
 	kmq_timedsend;
 	kmq_unlink;
 	ksem_close;
 	ksem_destroy;
 	ksem_getvalue;
 	ksem_init;
 	ksem_open;
 	ksem_post;
 	ksem_timedwait;
 	ksem_trywait;
 	ksem_unlink;
 	ksem_wait;
 	ktrace;
 	lchflags;
 	lchmod;
 	lchown;
 	lgetfh;
 	link;
 	lio_listio;
 	listen;
-	lstat;
 	lutimes;
 	mac_syscall;
 	madvise;
 	mincore;
 	minherit;
 	mkdir;
 	mkfifo;
-	mknod;
 	mlock;
 	mlockall;
 	modfind;
 	modfnext;
 	modnext;
 	modstat;
 	mount;
 	mprotect;
 	msgget;
 	msgrcv;
 	msgsnd;
 	msgsys;
 	msync;
 	munlock;
 	munlockall;
 	munmap;
 	nanosleep;
 	netbsd_lchown;
 	netbsd_msync;
 	nfssvc;
-	nfstat;
-	nlstat;
 	nmount;
-	nstat;
 	ntp_adjtime;
 	ntp_gettime;
 	open;
 	pathconf;
 	pipe;
 	poll;
 	posix_openpt;
 	preadv;
 	profil;
 	pselect;
 	ptrace;
 	pwritev;
 	quotactl;
 	read;
 	readlink;
 	readv;
 	reboot;
 	recvfrom;
 	recvmsg;
 	rename;
 	revoke;
 	rfork;
 	rmdir;
 	rtprio;
 	rtprio_thread;
 	sched_get_priority_max;
 	sched_get_priority_min;
 	sched_getparam;
 	sched_getscheduler;
 	sched_rr_get_interval;
 	sched_setparam;
 	sched_setscheduler;
 	sched_yield;
 	select;
 	semget;
 	semop;
 	semsys;
 	sendfile;
 	sendmsg;
 	sendto;
 	setaudit;
 	setaudit_addr;
 	setauid;
 	setegid;
 	seteuid;
 	setgid;
 	setgroups;
 	setitimer;
 	setlogin;
 	setpgid;
 	setpriority;
 	setregid;
 	setresgid;
 	setresuid;
 	setreuid;
 	setrlimit;
 	setsid;
 	setsockopt;
 	settimeofday;
 	setuid;
 	shm_open;
 	shm_unlink;
 	shmat;
 	shmdt;
 	shmget;
 	shmsys;
 	shutdown;
 	sigaction;
 	sigaltstack;
 	sigpending;
 	sigprocmask;
 	sigqueue;
 	sigreturn;
 	sigsuspend;
 	sigtimedwait;
 	sigwait;
 	sigwaitinfo;
 	socket;
 	socketpair;
-	stat;
-	statfs;
 	swapoff;
 	swapon;
 	symlink;
 	sync;
 	sysarch;
 	syscall;
 	thr_create;
 	thr_exit;
 	thr_kill;
 	thr_kill2;
 	thr_new;
 	thr_self;
 	thr_set_name;
 	thr_suspend;
 	thr_wake;
 	ktimer_create;		/* Do we want these to be public interfaces? */
 	ktimer_delete;		/* librt uses them to provide timer_xxx. */
 	ktimer_getoverrun;
 	ktimer_gettime;
 	ktimer_settime;
 	umask;
 	undelete;
 	unlink;
 	unmount;
 	utimes;
 	utrace;
 	uuidgen;
 	vadvise;
 	wait4;
 	write;
 	writev;
 
 	__error;
 	ftruncate;
 	lseek;
 	mmap;
 	pread;
 	pwrite;
 	truncate;
 };
 
 FBSD_1.1 {
 	__semctl;
 	closefrom;
 	cpuset;
 	cpuset_getid;
 	cpuset_setid;
 	cpuset_getaffinity;
 	cpuset_setaffinity;
 	faccessat;
 	fchmodat;
 	fchownat;
 	fexecve;
-	fstatat;
 	futimesat;
 	jail_get;
 	jail_set;
 	jail_remove;
 	linkat;
 	lpathconf;
 	mkdirat;
 	mkfifoat;
-	mknodat;
 	msgctl;
 	readlinkat;
 	renameat;
 	setfib;
 	shmctl;
 	symlinkat;
 	unlinkat;
 };
 
 FBSD_1.2 {
 	cap_enter;
 	cap_getmode;
 	getloginclass;
 	pdfork;
 	pdgetpid;
 	pdkill;
 	posix_fallocate;
 	rctl_get_racct;
 	rctl_get_rules;
 	rctl_get_limits;
 	rctl_add_rule;
 	rctl_remove_rule;
 	setloginclass;
 };
 
 FBSD_1.3 {
 	accept4;
 	aio_mlock;
 	bindat;
 	cap_fcntls_get;
 	cap_fcntls_limit;
 	cap_ioctls_get;
 	cap_ioctls_limit;
 	__cap_rights_get;
 	cap_rights_limit;
 	cap_sandboxed;
 	chflagsat;
 	clock_getcpuclockid2;
 	connectat;
 	ffclock_getcounter;
 	ffclock_getestimate;
 	ffclock_setestimate;
 	pipe2;
 	posix_fadvise;
 	procctl;
 	wait6;
 };
 
 FBSD_1.4 {
 	futimens;
 	ppoll;
 	utimensat;
 	numa_setaffinity;
 	numa_getaffinity;
 	sendmmsg;
 	recvmmsg;
 };
 
 FBSD_1.5 {
 	clock_nanosleep;
 	fdatasync;
+	fhstat;
+	fhstatfs;
+	fstat;
+	fstatat;
+	fstatfs;
+	getdents;
+	getdirentries;
+	getfsstat;
+	lstat;
+	mknod;
+	mknodat;
+	stat;
+	statfs;
 };
 
 FBSDprivate_1.0 {
 	___acl_aclcheck_fd;
 	__sys___acl_aclcheck_fd;
 	___acl_aclcheck_file;
 	__sys___acl_aclcheck_file;
 	___acl_aclcheck_link;
 	__sys___acl_aclcheck_link;
 	___acl_delete_fd;
 	__sys___acl_delete_fd;
 	___acl_delete_file;
 	__sys___acl_delete_file;
 	___acl_delete_link;
 	__sys___acl_delete_link;
 	___acl_get_fd;
 	__sys___acl_get_fd;
 	___acl_get_file;
 	__sys___acl_get_file;
 	___acl_get_link;
 	__sys___acl_get_link;
 	___acl_set_fd;
 	__sys___acl_set_fd;
 	___acl_set_file;
 	__sys___acl_set_file;
 	___acl_set_link;
 	__sys___acl_set_link;
 	___getcwd;
 	__sys___getcwd;
 	___mac_execve;
 	__sys___mac_execve;
 	___mac_get_fd;
 	__sys___mac_get_fd;
 	___mac_get_file;
 	__sys___mac_get_file;
 	___mac_get_link;
 	__sys___mac_get_link;
 	___mac_get_pid;
 	__sys___mac_get_pid;
 	___mac_get_proc;
 	__sys___mac_get_proc;
 	___mac_set_fd;
 	__sys___mac_set_fd;
 	___mac_set_file;
 	__sys___mac_set_file;
 	___mac_set_link;
 	__sys___mac_set_link;
 	___mac_set_proc;
 	__sys___mac_set_proc;
 	___semctl;
 	__sys___semctl;
 	___setugid;
 	__sys___setugid;
 	___syscall;
 	__sys___syscall;
 	___sysctl;
 	__sys___sysctl;
 	__umtx_op;
 	__sys__umtx_op;
 	_abort2;
 	__sys_abort2;
 	_accept;
 	__sys_accept;
 	_accept4;
 	__sys_accept4;
 	_access;
 	__sys_access;
 	_acct;
 	__sys_acct;
 	_adjtime;
 	__sys_adjtime;
 	_aio_cancel;
 	__sys_aio_cancel;
 	_aio_error;
 	__sys_aio_error;
 	_aio_fsync;
 	__sys_aio_fsync;
 	_aio_read;
 	__sys_aio_read;
 	_aio_return;
 	__sys_aio_return;
 	_aio_suspend;
 	__sys_aio_suspend;
 	_aio_waitcomplete;
 	__sys_aio_waitcomplete;
 	_aio_write;
 	__sys_aio_write;
 	_audit;
 	__sys_audit;
 	_auditctl;
 	__sys_auditctl;
 	_auditon;
 	__sys_auditon;
 	_bind;
 	__sys_bind;
 	_chdir;
 	__sys_chdir;
 	_chflags;
 	__sys_chflags;
 	_chmod;
 	__sys_chmod;
 	_chown;
 	__sys_chown;
 	_chroot;
 	__sys_chroot;
 	_clock_getcpuclockid2;
 	__sys_clock_getcpuclockid2;
 	_clock_getres;
 	__sys_clock_getres;
 	_clock_gettime;
 	__sys_clock_gettime;
 	__sys_clock_nanosleep;
 	_clock_settime;
 	__sys_clock_settime;
 	_close;
 	__sys_close;
 	_closefrom;
 	__sys_closefrom;
 	_connect;
 	__sys_connect;
 	_cpuset;
 	__sys_cpuset;
 	_cpuset_getid;
 	__sys_cpuset_getid;
 	_cpuset_setid;
 	__sys_cpuset_setid;
 	_cpuset_getaffinity;
 	__sys_cpuset_getaffinity;
 	_cpuset_setaffinity;
 	__sys_cpuset_setaffinity;
 	_dup;
 	__sys_dup;
 	_dup2;
 	__sys_dup2;
 	_eaccess;
 	__sys_eaccess;
 	_execve;
 	__sys_execve;
 	_extattr_delete_fd;
 	__sys_extattr_delete_fd;
 	_extattr_delete_file;
 	__sys_extattr_delete_file;
 	_extattr_delete_link;
 	__sys_extattr_delete_link;
 	_extattr_get_fd;
 	__sys_extattr_get_fd;
 	_extattr_get_file;
 	__sys_extattr_get_file;
 	_extattr_get_link;
 	__sys_extattr_get_link;
 	_extattr_list_fd;
 	__sys_extattr_list_fd;
 	_extattr_list_file;
 	__sys_extattr_list_file;
 	_extattr_list_link;
 	__sys_extattr_list_link;
 	_extattr_set_fd;
 	__sys_extattr_set_fd;
 	_extattr_set_file;
 	__sys_extattr_set_file;
 	_extattr_set_link;
 	__sys_extattr_set_link;
 	_extattrctl;
 	__sys_extattrctl;
 	_fchdir;
 	__sys_fchdir;
 	_fchflags;
 	__sys_fchflags;
 	_fchmod;
 	__sys_fchmod;
 	_fchown;
 	__sys_fchown;
 	_fcntl;
 	__sys_fcntl;
 	__fcntl_compat;
 	_fhopen;
 	__sys_fhopen;
 	_fhstat;
 	__sys_fhstat;
 	_fhstatfs;
 	__sys_fhstatfs;
 	_flock;
 	__sys_flock;
 	_fork;
 	__sys_fork;
 	_fpathconf;
 	__sys_fpathconf;
 	_fstat;
 	__sys_fstat;
 	_fstatfs;
 	__sys_fstatfs;
 	_fsync;
 	__sys_fsync;
 	_fdatasync;
 	__sys_fdatasync;
 	_futimes;
 	__sys_futimes;
 	_getaudit;
 	__sys_getaudit;
 	_getaudit_addr;
 	__sys_getaudit_addr;
 	_getauid;
 	__sys_getauid;
 	_getcontext;
 	__sys_getcontext;
-	_getdents;
-	__sys_getdents;
 	_getdirentries;
 	__sys_getdirentries;
 	_getdtablesize;
 	__sys_getdtablesize;
 	_getegid;
 	__sys_getegid;
 	_geteuid;
 	__sys_geteuid;
 	_getfh;
 	__sys_getfh;
 	_getfsstat;
 	__sys_getfsstat;
 	_getgid;
 	__sys_getgid;
 	_getgroups;
 	__sys_getgroups;
 	_getitimer;
 	__sys_getitimer;
 	_getpeername;
 	__sys_getpeername;
 	_getpgid;
 	__sys_getpgid;
 	_getpgrp;
 	__sys_getpgrp;
 	_getpid;
 	__sys_getpid;
 	_getppid;
 	__sys_getppid;
 	_getpriority;
 	__sys_getpriority;
 	_getresgid;
 	__sys_getresgid;
 	_getresuid;
 	__sys_getresuid;
 	_getrlimit;
 	__sys_getrlimit;
 	_getrusage;
 	__sys_getrusage;
 	_getsid;
 	__sys_getsid;
 	_getsockname;
 	__sys_getsockname;
 	_getsockopt;
 	__sys_getsockopt;
 	_gettimeofday;
 	__sys_gettimeofday;
 	_getuid;
 	__sys_getuid;
 	_ioctl;
 	__sys_ioctl;
 	_issetugid;
 	__sys_issetugid;
 	_jail;
 	__sys_jail;
 	_jail_attach;
 	__sys_jail_attach;
 	_kenv;
 	__sys_kenv;
 	_kevent;
 	__sys_kevent;
 	_kill;
 	__sys_kill;
 	_kldfind;
 	__sys_kldfind;
 	_kldfirstmod;
 	__sys_kldfirstmod;
 	_kldload;
 	__sys_kldload;
 	_kldnext;
 	__sys_kldnext;
 	_kldstat;
 	__sys_kldstat;
 	_kldsym;
 	__sys_kldsym;
 	_kldunload;
 	__sys_kldunload;
 	_kldunloadf;
 	__sys_kldunloadf;
 	_kmq_notify;
 	__sys_kmq_notify;
 	_kmq_open;
 	__sys_kmq_open;
 	_kmq_setattr;
 	__sys_kmq_setattr;
 	_kmq_timedreceive;
 	__sys_kmq_timedreceive;
 	_kmq_timedsend;
 	__sys_kmq_timedsend;
 	_kmq_unlink;
 	__sys_kmq_unlink;
 	_kqueue;
 	__sys_kqueue;
 	_ksem_close;
 	__sys_ksem_close;
 	_ksem_destroy;
 	__sys_ksem_destroy;
 	_ksem_getvalue;
 	__sys_ksem_getvalue;
 	_ksem_init;
 	__sys_ksem_init;
 	_ksem_open;
 	__sys_ksem_open;
 	_ksem_post;
 	__sys_ksem_post;
 	_ksem_timedwait;
 	__sys_ksem_timedwait;
 	_ksem_trywait;
 	__sys_ksem_trywait;
 	_ksem_unlink;
 	__sys_ksem_unlink;
 	_ksem_wait;
 	__sys_ksem_wait;
 	_ktrace;
 	__sys_ktrace;
 	_lchflags;
 	__sys_lchflags;
 	_lchmod;
 	__sys_lchmod;
 	_lchown;
 	__sys_lchown;
 	_lgetfh;
 	__sys_lgetfh;
 	_link;
 	__sys_link;
 	_lio_listio;
 	__sys_lio_listio;
 	_listen;
 	__sys_listen;
-	_lstat;
-	__sys_lstat;
 	_lutimes;
 	__sys_lutimes;
 	_mac_syscall;
 	__sys_mac_syscall;
 	_madvise;
 	__sys_madvise;
 	_mincore;
 	__sys_mincore;
 	_minherit;
 	__sys_minherit;
 	_mkdir;
 	__sys_mkdir;
 	_mkfifo;
 	__sys_mkfifo;
 	_mknod;
 	__sys_mknod;
 	_mlock;
 	__sys_mlock;
 	_mlockall;
 	__sys_mlockall;
 	_modfind;
 	__sys_modfind;
 	_modfnext;
 	__sys_modfnext;
 	_modnext;
 	__sys_modnext;
 	_modstat;
 	__sys_modstat;
 	_mount;
 	__sys_mount;
 	_mprotect;
 	__sys_mprotect;
 	_msgctl;
 	__sys_msgctl;
 	_msgget;
 	__sys_msgget;
 	_msgrcv;
 	__sys_msgrcv;
 	_msgsnd;
 	__sys_msgsnd;
 	_msgsys;
 	__sys_msgsys;
 	_msync;
 	__sys_msync;
 	_munlock;
 	__sys_munlock;
 	_munlockall;
 	__sys_munlockall;
 	_munmap;
 	__sys_munmap;
 	_nanosleep;
 	__sys_nanosleep;
 	_netbsd_lchown;
 	__sys_netbsd_lchown;
 	_netbsd_msync;
 	__sys_netbsd_msync;
 	_nfssvc;
 	__sys_nfssvc;
-	_nfstat;
-	__sys_nfstat;
-	_nlstat;
-	__sys_nlstat;
 	_nmount;
 	__sys_nmount;
-	_nstat;
-	__sys_nstat;
 	_ntp_adjtime;
 	__sys_ntp_adjtime;
 	_ntp_gettime;
 	__sys_ntp_gettime;
 	_open;
 	__sys_open;
 	_openat;
 	__sys_openat;
 	_pathconf;
 	__sys_pathconf;
 	_pipe;
 	__sys_pipe;
 	_poll;
 	__sys_poll;
 	_ppoll;
 	__sys_ppoll;
 	_preadv;
 	__sys_preadv;
 	_procctl;
 	__sys_procctl;
 	_profil;
 	__sys_profil;
 	_pselect;
 	__sys_pselect;
 	_ptrace;
 	__sys_ptrace;
 	_pwritev;
 	__sys_pwritev;
 	_quotactl;
 	__sys_quotactl;
 	_read;
 	__sys_read;
 	_readlink;
 	__sys_readlink;
 	_readv;
 	__sys_readv;
 	_reboot;
 	__sys_reboot;
 	_recvfrom;
 	__sys_recvfrom;
 	_recvmsg;
 	__sys_recvmsg;
 	_rename;
 	__sys_rename;
 	_revoke;
 	__sys_revoke;
 	_rfork;
 	__sys_rfork;
 	_rmdir;
 	__sys_rmdir;
 	_rtprio;
 	__sys_rtprio;
 	_rtprio_thread;
 	__sys_rtprio_thread;
 	_sched_get_priority_max;
 	__sys_sched_get_priority_max;
 	_sched_get_priority_min;
 	__sys_sched_get_priority_min;
 	_sched_getparam;
 	__sys_sched_getparam;
 	_sched_getscheduler;
 	__sys_sched_getscheduler;
 	_sched_rr_get_interval;
 	__sys_sched_rr_get_interval;
 	_sched_setparam;
 	__sys_sched_setparam;
 	_sched_setscheduler;
 	__sys_sched_setscheduler;
 	_sched_yield;
 	__sys_sched_yield;
 	_select;
 	__sys_select;
 	_semget;
 	__sys_semget;
 	_semop;
 	__sys_semop;
 	_semsys;
 	__sys_semsys;
 	_sendfile;
 	__sys_sendfile;
 	_sendmsg;
 	__sys_sendmsg;
 	_sendto;
 	__sys_sendto;
 	_setaudit;
 	__sys_setaudit;
 	_setaudit_addr;
 	__sys_setaudit_addr;
 	_setauid;
 	__sys_setauid;
 	_setcontext;
 	__sys_setcontext;
 	_setegid;
 	__sys_setegid;
 	_seteuid;
 	__sys_seteuid;
 	_setgid;
 	__sys_setgid;
 	_setgroups;
 	__sys_setgroups;
 	_setitimer;
 	__sys_setitimer;
 	_setlogin;
 	__sys_setlogin;
 	_setpgid;
 	__sys_setpgid;
 	_setpriority;
 	__sys_setpriority;
 	_setregid;
 	__sys_setregid;
 	_setresgid;
 	__sys_setresgid;
 	_setresuid;
 	__sys_setresuid;
 	_setreuid;
 	__sys_setreuid;
 	_setrlimit;
 	__sys_setrlimit;
 	_setsid;
 	__sys_setsid;
 	_setsockopt;
 	__sys_setsockopt;
 	_settimeofday;
 	__sys_settimeofday;
 	_setuid;
 	__sys_setuid;
 	_shm_open;
 	__sys_shm_open;
 	_shm_unlink;
 	__sys_shm_unlink;
 	_shmat;
 	__sys_shmat;
 	_shmctl;
 	__sys_shmctl;
 	_shmdt;
 	__sys_shmdt;
 	_shmget;
 	__sys_shmget;
 	_shmsys;
 	__sys_shmsys;
 	_shutdown;
 	__sys_shutdown;
 	_sigaction;
 	__sys_sigaction;
 	_sigaltstack;
 	__sys_sigaltstack;
 	_sigpending;
 	__sys_sigpending;
 	_sigprocmask;
 	__sys_sigprocmask;
 	_sigqueue;
 	__sys_sigqueue;
 	_sigreturn;
 	__sys_sigreturn;
 	_sigsuspend;
 	__sys_sigsuspend;
 	_sigtimedwait;
 	__sys_sigtimedwait;
 	_sigwait;
 	__sigwait;
 	__sys_sigwait;
 	_sigwaitinfo;
 	__sys_sigwaitinfo;
 	_socket;
 	__sys_socket;
 	_socketpair;
 	__sys_socketpair;
-	_stat;
-	__sys_stat;
 	_statfs;
 	__sys_statfs;
 	_swapcontext;
 	__sys_swapcontext;
 	_swapoff;
 	__sys_swapoff;
 	_swapon;
 	__sys_swapon;
 	_symlink;
 	__sys_symlink;
 	_sync;
 	__sys_sync;
 	_sysarch;
 	__sys_sysarch;
 	_syscall;
 	__sys_syscall;
 	_thr_create;
 	__sys_thr_create;
 	_thr_exit;
 	__sys_thr_exit;
 	_thr_kill;
 	__sys_thr_kill;
 	_thr_kill2;
 	__sys_thr_kill2;
 	_thr_new;
 	__sys_thr_new;
 	_thr_self;
 	__sys_thr_self;
 	_thr_set_name;
 	__sys_thr_set_name;
 	_thr_suspend;
 	__sys_thr_suspend;
 	_thr_wake;
 	__sys_thr_wake;
 	_ktimer_create;
 	__sys_ktimer_create;
 	_ktimer_delete;
 	__sys_ktimer_delete;
 	_ktimer_getoverrun;
 	__sys_ktimer_getoverrun;
 	_ktimer_gettime;
 	__sys_ktimer_gettime;
 	_ktimer_settime;
 	__sys_ktimer_settime;
 	_umask;
 	__sys_umask;
 	_undelete;
 	__sys_undelete;
 	_unlink;
 	__sys_unlink;
 	_unmount;
 	__sys_unmount;
 	_utimes;
 	__sys_utimes;
 	_utrace;
 	__sys_utrace;
 	_uuidgen;
 	__sys_uuidgen;
 	_vadvise;
 	__sys_vadvise;
 	_wait4;
 	__sys_wait4;
 	_wait6;
 	__sys_wait6;
 	_write;
 	__sys_write;
 	_writev;
 	__sys_writev;
 	__set_error_selector;
 	nlm_syscall;
 	gssd_syscall;
 	__libc_interposing_slot;
 	__libc_sigwait;
 };
Index: head/lib/libc/sys/getdents.c
===================================================================
--- head/lib/libc/sys/getdents.c	(nonexistent)
+++ head/lib/libc/sys/getdents.c	(revision 318736)
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2012 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#include <sys/syscall.h>
+#include <dirent.h>
+#include "libc_private.h"
+
+ssize_t
+getdents(int fd, char *buf, size_t nbytes)
+{
+
+	return (__sys_getdirentries(fd, buf, nbytes, NULL));
+}

Property changes on: head/lib/libc/sys/getdents.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/sys/getdirentries.2
===================================================================
--- head/lib/libc/sys/getdirentries.2	(revision 318735)
+++ head/lib/libc/sys/getdirentries.2	(revision 318736)
@@ -1,186 +1,186 @@
 .\" Copyright (c) 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)getdirentries.2	8.2 (Berkeley) 5/3/95
 .\" $FreeBSD$
 .\"
 .Dd May 3, 1995
 .Dt GETDIRENTRIES 2
 .Os
 .Sh NAME
 .Nm getdirentries ,
 .Nm getdents
 .Nd "get directory entries in a file system independent format"
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In sys/types.h
 .In dirent.h
-.Ft int
-.Fn getdirentries "int fd" "char *buf" "int nbytes" "long *basep"
-.Ft int
-.Fn getdents "int fd" "char *buf" "int nbytes"
+.Ft ssize_t
+.Fn getdirentries "int fd" "char *buf" "size_t nbytes" "off_t *basep"
+.Ft ssize_t
+.Fn getdents "int fd" "char *buf" "size_t nbytes"
 .Sh DESCRIPTION
 The
 .Fn getdirentries
 and
 .Fn getdents
 system calls read directory entries from the directory
 referenced by the file descriptor
 .Fa fd
 into the buffer pointed to by
 .Fa buf ,
 in a file system independent format.
 Up to
 .Fa nbytes
 of data will be transferred.
 The
 .Fa nbytes
 argument must be greater than or equal to the
 block size associated with the file,
 see
 .Xr stat 2 .
 Some file systems may not support these system calls
 with buffers smaller than this size.
 .Pp
 The data in the buffer is a series of
 .Vt dirent
 structures each containing the following entries:
 .Bd -literal -offset indent
 uint32_t d_fileno;
 uint16_t d_reclen;
 uint8_t  d_type;
 uint8_t  d_namlen;
 char	d_name[MAXNAMLEN + 1];	/* see below */
 .Ed
 .Pp
 The
 .Fa d_fileno
 entry is a number which is unique for each
 distinct file in the file system.
 Files that are linked by hard links (see
 .Xr link 2 )
 have the same
 .Fa d_fileno .
 The
 .Fa d_reclen
 entry is the length, in bytes, of the directory record.
 The
 .Fa d_type
 entry is the type of the file pointed to by the directory record.
 The file type values are defined in
 .Fa <sys/dirent.h> .
 The
 .Fa d_name
 entry contains a null terminated file name.
 The
 .Fa d_namlen
 entry specifies the length of the file name excluding the null byte.
 Thus the actual size of
 .Fa d_name
 may vary from 1 to
 .Dv MAXNAMLEN
 \&+ 1.
 .Pp
 Entries may be separated by extra space.
 The
 .Fa d_reclen
 entry may be used as an offset from the start of a
 .Fa dirent
 structure to the next structure, if any.
 .Pp
 The actual number of bytes transferred is returned.
 The current position pointer associated with
 .Fa fd
 is set to point to the next block of entries.
 The pointer may not advance by the number of bytes returned by
 .Fn getdirentries
 or
 .Fn getdents .
 A value of zero is returned when
 the end of the directory has been reached.
 .Pp
 The
 .Fn getdirentries
 system call writes the position of the block read into the location pointed to by
 .Fa basep .
 Alternatively, the current position pointer may be set and retrieved by
 .Xr lseek 2 .
 The current position pointer should only be set to a value returned by
 .Xr lseek 2 ,
 a value returned in the location pointed to by
 .Fa basep
 .Po Fn getdirentries
 only
 .Pc
 or zero.
 .Sh RETURN VALUES
 If successful, the number of bytes actually transferred is returned.
 Otherwise, -1 is returned and the global variable
 .Va errno
 is set to indicate the error.
 .Sh ERRORS
 The
 .Fn getdirentries
 system call
 will fail if:
 .Bl -tag -width Er
 .It Bq Er EBADF
 The
 .Fa fd
 argument
 is not a valid file descriptor open for reading.
 .It Bq Er EFAULT
 Either
 .Fa buf
 or
 .Fa basep
 point outside the allocated address space.
 .It Bq Er EINVAL
 The file referenced by
 .Fa fd
 is not a directory, or
 .Fa nbytes
 is too small for returning a directory entry or block of entries,
 or the current position pointer is invalid.
 .It Bq Er EIO
 An
 .Tn I/O
 error occurred while reading from or writing to the file system.
 .El
 .Sh SEE ALSO
 .Xr lseek 2 ,
 .Xr open 2
 .Sh HISTORY
 The
 .Fn getdirentries
 system call first appeared in
 .Bx 4.4 .
 The
 .Fn getdents
 system call first appeared in
 .Fx 3.0 .
Index: head/lib/libc/sys/lstat.c
===================================================================
--- head/lib/libc/sys/lstat.c	(nonexistent)
+++ head/lib/libc/sys/lstat.c	(revision 318736)
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2012 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "libc_private.h"
+
+int
+lstat(const char *path, struct stat *sb)
+{
+
+	return (__sys_fstatat(AT_FDCWD, path, sb, AT_SYMLINK_NOFOLLOW));
+}

Property changes on: head/lib/libc/sys/lstat.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/sys/mknod.c
===================================================================
--- head/lib/libc/sys/mknod.c	(nonexistent)
+++ head/lib/libc/sys/mknod.c	(revision 318736)
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2011 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "libc_private.h"
+
+int __sys_mknodat(int, const char *, mode_t, dev_t);
+
+int
+mknod(const char *path, mode_t mode, dev_t dev)
+{
+
+	return (__sys_mknodat(AT_FDCWD, path, mode, dev));
+}

Property changes on: head/lib/libc/sys/mknod.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/sys/stat.c
===================================================================
--- head/lib/libc/sys/stat.c	(nonexistent)
+++ head/lib/libc/sys/stat.c	(revision 318736)
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2012 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "namespace.h"
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "libc_private.h"
+
+int
+stat(const char *path, struct stat *sb)
+{
+
+	return (__sys_fstatat(AT_FDCWD, path, sb, 0));
+}

Property changes on: head/lib/libc/sys/stat.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libc/sys/statfs.2
===================================================================
--- head/lib/libc/sys/statfs.2	(revision 318735)
+++ head/lib/libc/sys/statfs.2	(revision 318736)
@@ -1,235 +1,235 @@
 .\" Copyright (c) 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)statfs.2	8.5 (Berkeley) 5/24/95
 .\" $FreeBSD$
 .\"
-.Dd November 1, 2006
+.Dd February 13, 2017
 .Dt STATFS 2
 .Os
 .Sh NAME
 .Nm statfs
 .Nd get file system statistics
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/mount.h
 .Ft int
 .Fn statfs "const char *path" "struct statfs *buf"
 .Ft int
 .Fn fstatfs "int fd" "struct statfs *buf"
 .Sh DESCRIPTION
 The
 .Fn statfs
 system call
 returns information about a mounted file system.
 The
 .Fa path
 argument
 is the path name of any file within the mounted file system.
 The
 .Fa buf
 argument
 is a pointer to a
 .Vt statfs
 structure defined as follows:
 .Bd -literal
 typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */
 
 /*
  * filesystem statistics
  */
 
 #define	MFSNAMELEN	16		/* length of type name including null */
-#define	MNAMELEN	88		/* size of on/from name bufs */
-#define	STATFS_VERSION	0x20030518	/* current version number */
+#define	MNAMELEN	1024		/* size of on/from name bufs */
+#define	STATFS_VERSION	0x20140518	/* current version number */
 
 struct statfs {
 uint32_t f_version;		/* structure version number */
 uint32_t f_type;		/* type of filesystem */
 uint64_t f_flags;		/* copy of mount exported flags */
 uint64_t f_bsize;		/* filesystem fragment size */
 uint64_t f_iosize;		/* optimal transfer block size */
 uint64_t f_blocks;		/* total data blocks in filesystem */
 uint64_t f_bfree;		/* free blocks in filesystem */
 int64_t	 f_bavail;		/* free blocks avail to non-superuser */
 uint64_t f_files;		/* total file nodes in filesystem */
 int64_t	 f_ffree;		/* free nodes avail to non-superuser */
 uint64_t f_syncwrites;		/* count of sync writes since mount */
 uint64_t f_asyncwrites;		/* count of async writes since mount */
 uint64_t f_syncreads;		/* count of sync reads since mount */
 uint64_t f_asyncreads;		/* count of async reads since mount */
 uint64_t f_spare[10];		/* unused spare */
 uint32_t f_namemax;		/* maximum filename length */
 uid_t	  f_owner;		/* user that mounted the filesystem */
 fsid_t	  f_fsid;		/* filesystem id */
 char	  f_charspare[80];	    /* spare string space */
 char	  f_fstypename[MFSNAMELEN]; /* filesystem type name */
 char	  f_mntfromname[MNAMELEN];  /* mounted filesystem */
 char	  f_mntonname[MNAMELEN];    /* directory on which mounted */
 };
 .Ed
 .Pp
 The flags that may be returned include:
 .Bl -tag -width MNT_SYNCHRONOUS
 .It Dv MNT_RDONLY
 The file system is mounted read-only;
 Even the super-user may not write on it.
 .It Dv MNT_NOEXEC
 Files may not be executed from the file system.
 .It Dv MNT_NOSUID
 Setuid and setgid bits on files are not honored when they are executed.
 .It Dv MNT_SYNCHRONOUS
 All I/O to the file system is done synchronously.
 .It Dv MNT_ASYNC
 No file system I/O is done synchronously.
 .It Dv MNT_SOFTDEP
 Soft updates being done (see
 .Xr ffs 7 ) .
 .It Dv MNT_GJOURNAL
 Journaling with gjournal is enabled (see
 .Xr gjournal 8 ) .
 .It Dv MNT_SUIDDIR
 Special handling of SUID bit on directories.
 .It Dv MNT_UNION
 Union with underlying file system.
 .It Dv MNT_NOSYMFOLLOW
 Symbolic links are not followed.
 .It Dv MNT_NOCLUSTERR
 Read clustering is disabled.
 .It Dv MNT_NOCLUSTERW
 Write clustering is disabled.
 .\".It Dv MNT_JAILDEVFS
 .\"XXX
 .It Dv MNT_MULTILABEL
 Mandatory Access Control (MAC) support for individual objects
 (see
 .Xr mac 4 ) .
 .It Dv MNT_ACLS
 Access Control List (ACL) support enabled.
 .It Dv MNT_LOCAL
 The file system resides locally.
 .It Dv MNT_QUOTA
 The file system has quotas enabled on it.
 .It Dv MNT_ROOTFS
 Identifies the root file system.
 .It Dv MNT_EXRDONLY
 The file system is exported read-only.
 .It Dv MNT_NOATIME
 Updating of file access times is disabled.
 .It Dv MNT_USER
 The file system has been mounted by a user.
 .\".It Dv MNT_IGNORE
 .\"XXX
 .It Dv MNT_EXPORTED
 The file system is exported for both reading and writing.
 .It Dv MNT_DEFEXPORTED
 The file system is exported for both reading and writing to any Internet host.
 .It Dv MNT_EXPORTANON
 The file system maps all remote accesses to the anonymous user.
 .It Dv MNT_EXKERB
 The file system is exported with Kerberos uid mapping.
 .It Dv MNT_EXPUBLIC
 The file system is exported publicly (WebNFS).
 .El
 .Pp
 Fields that are undefined for a particular file system are set to -1.
 The
 .Fn fstatfs
 system call
 returns the same information about an open file referenced by descriptor
 .Fa fd .
 .Sh RETURN VALUES
 .Rv -std
 .Sh ERRORS
 The
 .Fn statfs
 system call
 fails if one or more of the following are true:
 .Bl -tag -width Er
 .It Bq Er ENOTDIR
 A component of the path prefix of
 .Fa path
 is not a directory.
 .It Bq Er ENAMETOOLONG
 The length of a component of
 .Fa path
 exceeds 255 characters,
 or the length of
 .Fa path
 exceeds 1023 characters.
 .It Bq Er ENOENT
 The file referred to by
 .Fa path
 does not exist.
 .It Bq Er EACCES
 Search permission is denied for a component of the path prefix of
 .Fa path .
 .It Bq Er ELOOP
 Too many symbolic links were encountered in translating
 .Fa path .
 .It Bq Er EFAULT
 The
 .Fa buf
 or
 .Fa path
 argument
 points to an invalid address.
 .It Bq Er EIO
 An
 .Tn I/O
 error occurred while reading from or writing to the file system.
 .El
 .Pp
 The
 .Fn fstatfs
 system call
 fails if one or more of the following are true:
 .Bl -tag -width Er
 .It Bq Er EBADF
 The
 .Fa fd
 argument
 is not a valid open file descriptor.
 .It Bq Er EFAULT
 The
 .Fa buf
 argument
 points to an invalid address.
 .It Bq Er EIO
 An
 .Tn I/O
 error occurred while reading from or writing to the file system.
 .El
 .Sh SEE ALSO
 .Xr fhstatfs 2
 .Sh HISTORY
 The
 .Fn statfs
 system call first appeared in
 .Bx 4.4 .
Index: head/lib/libkvm/kvm_proc.c
===================================================================
--- head/lib/libkvm/kvm_proc.c	(revision 318735)
+++ head/lib/libkvm/kvm_proc.c	(revision 318736)
@@ -1,744 +1,745 @@
 /*-
  * Copyright (c) 1989, 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software developed by the Computer Systems
  * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
  * BG 91-66 and contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)kvm_proc.c	8.3 (Berkeley) 9/23/93";
 #endif /* LIBC_SCCS and not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Proc traversal interface for kvm.  ps and w are (probably) the exclusive
  * users of this code, so we've factored it out into a separate module.
  * Thus, we keep this grunge out of the other kvm applications (i.e.,
  * most other applications are interested only in open/close/read/nlist).
  */
 
 #include <sys/param.h>
 #define	_WANT_UCRED	/* make ucred.h give us 'struct ucred' */
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_task.h>
 #include <sys/cpuset.h>
 #include <sys/user.h>
 #include <sys/proc.h>
 #define	_WANT_PRISON	/* make jail.h give us 'struct prison' */
 #include <sys/jail.h>
 #include <sys/exec.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/file.h>
 #include <sys/conf.h>
 #define	_WANT_KW_EXITCODE
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <nlist.h>
 #include <kvm.h>
 
 #include <sys/sysctl.h>
 
 #include <limits.h>
 #include <memory.h>
 #include <paths.h>
 
 #include "kvm_private.h"
 
 #define KREAD(kd, addr, obj) \
 	(kvm_read(kd, addr, (char *)(obj), sizeof(*obj)) != sizeof(*obj))
 
 static int ticks;
 static int hz;
 static uint64_t cpu_tick_frequency;
 
 /*
  * From sys/kern/kern_tc.c. Depends on cpu_tick_frequency, which is
  * read/initialized before this function is ever called.
  */
 static uint64_t
 cputick2usec(uint64_t tick)
 {
 
 	if (cpu_tick_frequency == 0)
 		return (0);
 	if (tick > 18446744073709551)		/* floor(2^64 / 1000) */
 		return (tick / (cpu_tick_frequency / 1000000));
 	else if (tick > 18446744073709)	/* floor(2^64 / 1000000) */
 		return ((tick * 1000) / (cpu_tick_frequency / 1000));
 	else
 		return ((tick * 1000000) / cpu_tick_frequency);
 }
 
 /*
  * Read proc's from memory file into buffer bp, which has space to hold
  * at most maxcnt procs.
  */
 static int
 kvm_proclist(kvm_t *kd, int what, int arg, struct proc *p,
     struct kinfo_proc *bp, int maxcnt)
 {
 	int cnt = 0;
 	struct kinfo_proc kinfo_proc, *kp;
 	struct pgrp pgrp;
 	struct session sess;
 	struct cdev t_cdev;
 	struct tty tty;
 	struct vmspace vmspace;
 	struct sigacts sigacts;
 #if 0
 	struct pstats pstats;
 #endif
 	struct ucred ucred;
 	struct prison pr;
 	struct thread mtd;
 	struct proc proc;
 	struct proc pproc;
 	struct sysentvec sysent;
 	char svname[KI_EMULNAMELEN];
 
 	kp = &kinfo_proc;
 	kp->ki_structsize = sizeof(kinfo_proc);
 	/*
 	 * Loop on the processes. this is completely broken because we need to be
 	 * able to loop on the threads and merge the ones that are the same process some how.
 	 */
 	for (; cnt < maxcnt && p != NULL; p = LIST_NEXT(&proc, p_list)) {
 		memset(kp, 0, sizeof *kp);
 		if (KREAD(kd, (u_long)p, &proc)) {
 			_kvm_err(kd, kd->program, "can't read proc at %p", p);
 			return (-1);
 		}
 		if (proc.p_state == PRS_NEW)
 			continue;
 		if (proc.p_state != PRS_ZOMBIE) {
 			if (KREAD(kd, (u_long)TAILQ_FIRST(&proc.p_threads),
 			    &mtd)) {
 				_kvm_err(kd, kd->program,
 				    "can't read thread at %p",
 				    TAILQ_FIRST(&proc.p_threads));
 				return (-1);
 			}
 		}
 		if (KREAD(kd, (u_long)proc.p_ucred, &ucred) == 0) {
 			kp->ki_ruid = ucred.cr_ruid;
 			kp->ki_svuid = ucred.cr_svuid;
 			kp->ki_rgid = ucred.cr_rgid;
 			kp->ki_svgid = ucred.cr_svgid;
 			kp->ki_cr_flags = ucred.cr_flags;
 			if (ucred.cr_ngroups > KI_NGROUPS) {
 				kp->ki_ngroups = KI_NGROUPS;
 				kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 			} else
 				kp->ki_ngroups = ucred.cr_ngroups;
 			kvm_read(kd, (u_long)ucred.cr_groups, kp->ki_groups,
 			    kp->ki_ngroups * sizeof(gid_t));
 			kp->ki_uid = ucred.cr_uid;
 			if (ucred.cr_prison != NULL) {
 				if (KREAD(kd, (u_long)ucred.cr_prison, &pr)) {
 					_kvm_err(kd, kd->program,
 					    "can't read prison at %p",
 					    ucred.cr_prison);
 					return (-1);
 				}
 				kp->ki_jid = pr.pr_id;
 			}
 		}
 
 		switch(what & ~KERN_PROC_INC_THREAD) {
 
 		case KERN_PROC_GID:
 			if (kp->ki_groups[0] != (gid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_PID:
 			if (proc.p_pid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_RGID:
 			if (kp->ki_rgid != (gid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_UID:
 			if (kp->ki_uid != (uid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_RUID:
 			if (kp->ki_ruid != (uid_t)arg)
 				continue;
 			break;
 		}
 		/*
 		 * We're going to add another proc to the set.  If this
 		 * will overflow the buffer, assume the reason is because
 		 * nprocs (or the proc list) is corrupt and declare an error.
 		 */
 		if (cnt >= maxcnt) {
 			_kvm_err(kd, kd->program, "nprocs corrupt");
 			return (-1);
 		}
 		/*
 		 * gather kinfo_proc
 		 */
 		kp->ki_paddr = p;
 		kp->ki_addr = 0;	/* XXX uarea */
 		/* kp->ki_kstack = proc.p_thread.td_kstack; XXXKSE */
 		kp->ki_args = proc.p_args;
 		kp->ki_tracep = proc.p_tracevp;
 		kp->ki_textvp = proc.p_textvp;
 		kp->ki_fd = proc.p_fd;
 		kp->ki_vmspace = proc.p_vmspace;
 		if (proc.p_sigacts != NULL) {
 			if (KREAD(kd, (u_long)proc.p_sigacts, &sigacts)) {
 				_kvm_err(kd, kd->program,
 				    "can't read sigacts at %p", proc.p_sigacts);
 				return (-1);
 			}
 			kp->ki_sigignore = sigacts.ps_sigignore;
 			kp->ki_sigcatch = sigacts.ps_sigcatch;
 		}
 #if 0
 		if ((proc.p_flag & P_INMEM) && proc.p_stats != NULL) {
 			if (KREAD(kd, (u_long)proc.p_stats, &pstats)) {
 				_kvm_err(kd, kd->program,
 				    "can't read stats at %x", proc.p_stats);
 				return (-1);
 			}
 			kp->ki_start = pstats.p_start;
 
 			/*
 			 * XXX: The times here are probably zero and need
 			 * to be calculated from the raw data in p_rux and
 			 * p_crux.
 			 */
 			kp->ki_rusage = pstats.p_ru;
 			kp->ki_childstime = pstats.p_cru.ru_stime;
 			kp->ki_childutime = pstats.p_cru.ru_utime;
 			/* Some callers want child-times in a single value */
 			timeradd(&kp->ki_childstime, &kp->ki_childutime,
 			    &kp->ki_childtime);
 		}
 #endif
 		if (proc.p_oppid)
 			kp->ki_ppid = proc.p_oppid;
 		else if (proc.p_pptr) {
 			if (KREAD(kd, (u_long)proc.p_pptr, &pproc)) {
 				_kvm_err(kd, kd->program,
 				    "can't read pproc at %p", proc.p_pptr);
 				return (-1);
 			}
 			kp->ki_ppid = pproc.p_pid;
 		} else
 			kp->ki_ppid = 0;
 		if (proc.p_pgrp == NULL)
 			goto nopgrp;
 		if (KREAD(kd, (u_long)proc.p_pgrp, &pgrp)) {
 			_kvm_err(kd, kd->program, "can't read pgrp at %p",
 				 proc.p_pgrp);
 			return (-1);
 		}
 		kp->ki_pgid = pgrp.pg_id;
 		kp->ki_jobc = pgrp.pg_jobc;
 		if (KREAD(kd, (u_long)pgrp.pg_session, &sess)) {
 			_kvm_err(kd, kd->program, "can't read session at %p",
 				pgrp.pg_session);
 			return (-1);
 		}
 		kp->ki_sid = sess.s_sid;
 		(void)memcpy(kp->ki_login, sess.s_login,
 						sizeof(kp->ki_login));
 		kp->ki_kiflag = sess.s_ttyvp ? KI_CTTY : 0;
 		if (sess.s_leader == p)
 			kp->ki_kiflag |= KI_SLEADER;
 		if ((proc.p_flag & P_CONTROLT) && sess.s_ttyp != NULL) {
 			if (KREAD(kd, (u_long)sess.s_ttyp, &tty)) {
 				_kvm_err(kd, kd->program,
 					 "can't read tty at %p", sess.s_ttyp);
 				return (-1);
 			}
 			if (tty.t_dev != NULL) {
 				if (KREAD(kd, (u_long)tty.t_dev, &t_cdev)) {
 					_kvm_err(kd, kd->program,
 						 "can't read cdev at %p",
 						tty.t_dev);
 					return (-1);
 				}
 #if 0
 				kp->ki_tdev = t_cdev.si_udev;
 #else
 				kp->ki_tdev = NODEV;
 #endif
 			}
 			if (tty.t_pgrp != NULL) {
 				if (KREAD(kd, (u_long)tty.t_pgrp, &pgrp)) {
 					_kvm_err(kd, kd->program,
 						 "can't read tpgrp at %p",
 						tty.t_pgrp);
 					return (-1);
 				}
 				kp->ki_tpgid = pgrp.pg_id;
 			} else
 				kp->ki_tpgid = -1;
 			if (tty.t_session != NULL) {
 				if (KREAD(kd, (u_long)tty.t_session, &sess)) {
 					_kvm_err(kd, kd->program,
 					    "can't read session at %p",
 					    tty.t_session);
 					return (-1);
 				}
 				kp->ki_tsid = sess.s_sid;
 			}
 		} else {
 nopgrp:
 			kp->ki_tdev = NODEV;
 		}
 		if ((proc.p_state != PRS_ZOMBIE) && mtd.td_wmesg)
 			(void)kvm_read(kd, (u_long)mtd.td_wmesg,
 			    kp->ki_wmesg, WMESGLEN);
 
 		(void)kvm_read(kd, (u_long)proc.p_vmspace,
 		    (char *)&vmspace, sizeof(vmspace));
 		kp->ki_size = vmspace.vm_map.size;
 		/*
 		 * Approximate the kernel's method of calculating
 		 * this field.
 		 */
 #define		pmap_resident_count(pm) ((pm)->pm_stats.resident_count)
 		kp->ki_rssize = pmap_resident_count(&vmspace.vm_pmap);
 		kp->ki_swrss = vmspace.vm_swrss;
 		kp->ki_tsize = vmspace.vm_tsize;
 		kp->ki_dsize = vmspace.vm_dsize;
 		kp->ki_ssize = vmspace.vm_ssize;
 
 		switch (what & ~KERN_PROC_INC_THREAD) {
 
 		case KERN_PROC_PGRP:
 			if (kp->ki_pgid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_SESSION:
 			if (kp->ki_sid != (pid_t)arg)
 				continue;
 			break;
 
 		case KERN_PROC_TTY:
 			if ((proc.p_flag & P_CONTROLT) == 0 ||
 			     kp->ki_tdev != (dev_t)arg)
 				continue;
 			break;
 		}
 		if (proc.p_comm[0] != 0)
 			strlcpy(kp->ki_comm, proc.p_comm, MAXCOMLEN);
 		(void)kvm_read(kd, (u_long)proc.p_sysent, (char *)&sysent,
 		    sizeof(sysent));
 		(void)kvm_read(kd, (u_long)sysent.sv_name, (char *)&svname,
 		    sizeof(svname));
 		if (svname[0] != 0)
 			strlcpy(kp->ki_emul, svname, KI_EMULNAMELEN);
 		if ((proc.p_state != PRS_ZOMBIE) &&
 		    (mtd.td_blocked != 0)) {
 			kp->ki_kiflag |= KI_LOCKBLOCK;
 			if (mtd.td_lockname)
 				(void)kvm_read(kd,
 				    (u_long)mtd.td_lockname,
 				    kp->ki_lockname, LOCKNAMELEN);
 			kp->ki_lockname[LOCKNAMELEN] = 0;
 		}
 		kp->ki_runtime = cputick2usec(proc.p_rux.rux_runtime);
 		kp->ki_pid = proc.p_pid;
 		kp->ki_siglist = proc.p_siglist;
 		SIGSETOR(kp->ki_siglist, mtd.td_siglist);
 		kp->ki_sigmask = mtd.td_sigmask;
 		kp->ki_xstat = KW_EXITCODE(proc.p_xexit, proc.p_xsig);
 		kp->ki_acflag = proc.p_acflag;
 		kp->ki_lock = proc.p_lock;
 		if (proc.p_state != PRS_ZOMBIE) {
 			kp->ki_swtime = (ticks - proc.p_swtick) / hz;
 			kp->ki_flag = proc.p_flag;
 			kp->ki_sflag = 0;
 			kp->ki_nice = proc.p_nice;
 			kp->ki_traceflag = proc.p_traceflag;
 			if (proc.p_state == PRS_NORMAL) {
 				if (TD_ON_RUNQ(&mtd) ||
 				    TD_CAN_RUN(&mtd) ||
 				    TD_IS_RUNNING(&mtd)) {
 					kp->ki_stat = SRUN;
 				} else if (mtd.td_state ==
 				    TDS_INHIBITED) {
 					if (P_SHOULDSTOP(&proc)) {
 						kp->ki_stat = SSTOP;
 					} else if (
 					    TD_IS_SLEEPING(&mtd)) {
 						kp->ki_stat = SSLEEP;
 					} else if (TD_ON_LOCK(&mtd)) {
 						kp->ki_stat = SLOCK;
 					} else {
 						kp->ki_stat = SWAIT;
 					}
 				}
 			} else {
 				kp->ki_stat = SIDL;
 			}
 			/* Stuff from the thread */
 			kp->ki_pri.pri_level = mtd.td_priority;
 			kp->ki_pri.pri_native = mtd.td_base_pri;
 			kp->ki_lastcpu = mtd.td_lastcpu;
 			kp->ki_wchan = mtd.td_wchan;
 			kp->ki_oncpu = mtd.td_oncpu;
 			if (mtd.td_name[0] != '\0')
 				strlcpy(kp->ki_tdname, mtd.td_name, sizeof(kp->ki_tdname));
 			kp->ki_pctcpu = 0;
 			kp->ki_rqindex = 0;
 
 			/*
 			 * Note: legacy fields; wraps at NO_CPU_OLD or the
 			 * old max CPU value as appropriate
 			 */
 			if (mtd.td_lastcpu == NOCPU)
 				kp->ki_lastcpu_old = NOCPU_OLD;
 			else if (mtd.td_lastcpu > MAXCPU_OLD)
 				kp->ki_lastcpu_old = MAXCPU_OLD;
 			else
 				kp->ki_lastcpu_old = mtd.td_lastcpu;
 
 			if (mtd.td_oncpu == NOCPU)
 				kp->ki_oncpu_old = NOCPU_OLD;
 			else if (mtd.td_oncpu > MAXCPU_OLD)
 				kp->ki_oncpu_old = MAXCPU_OLD;
 			else
 				kp->ki_oncpu_old = mtd.td_oncpu;
 		} else {
 			kp->ki_stat = SZOMB;
 		}
+		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 		bcopy(&kinfo_proc, bp, sizeof(kinfo_proc));
 		++bp;
 		++cnt;
 	}
 	return (cnt);
 }
 
 /*
  * Build proc info array by reading in proc list from a crash dump.
  * Return number of procs read.  maxcnt is the max we will read.
  */
 static int
 kvm_deadprocs(kvm_t *kd, int what, int arg, u_long a_allproc,
     u_long a_zombproc, int maxcnt)
 {
 	struct kinfo_proc *bp = kd->procbase;
 	int acnt, zcnt;
 	struct proc *p;
 
 	if (KREAD(kd, a_allproc, &p)) {
 		_kvm_err(kd, kd->program, "cannot read allproc");
 		return (-1);
 	}
 	acnt = kvm_proclist(kd, what, arg, p, bp, maxcnt);
 	if (acnt < 0)
 		return (acnt);
 
 	if (KREAD(kd, a_zombproc, &p)) {
 		_kvm_err(kd, kd->program, "cannot read zombproc");
 		return (-1);
 	}
 	zcnt = kvm_proclist(kd, what, arg, p, bp + acnt, maxcnt - acnt);
 	if (zcnt < 0)
 		zcnt = 0;
 
 	return (acnt + zcnt);
 }
 
 struct kinfo_proc *
 kvm_getprocs(kvm_t *kd, int op, int arg, int *cnt)
 {
 	int mib[4], st, nprocs;
 	size_t size, osize;
 	int temp_op;
 
 	if (kd->procbase != 0) {
 		free((void *)kd->procbase);
 		/*
 		 * Clear this pointer in case this call fails.  Otherwise,
 		 * kvm_close() will free it again.
 		 */
 		kd->procbase = 0;
 	}
 	if (ISALIVE(kd)) {
 		size = 0;
 		mib[0] = CTL_KERN;
 		mib[1] = KERN_PROC;
 		mib[2] = op;
 		mib[3] = arg;
 		temp_op = op & ~KERN_PROC_INC_THREAD;
 		st = sysctl(mib,
 		    temp_op == KERN_PROC_ALL || temp_op == KERN_PROC_PROC ?
 		    3 : 4, NULL, &size, NULL, 0);
 		if (st == -1) {
 			_kvm_syserr(kd, kd->program, "kvm_getprocs");
 			return (0);
 		}
 		/*
 		 * We can't continue with a size of 0 because we pass
 		 * it to realloc() (via _kvm_realloc()), and passing 0
 		 * to realloc() results in undefined behavior.
 		 */
 		if (size == 0) {
 			/*
 			 * XXX: We should probably return an invalid,
 			 * but non-NULL, pointer here so any client
 			 * program trying to dereference it will
 			 * crash.  However, _kvm_freeprocs() calls
 			 * free() on kd->procbase if it isn't NULL,
 			 * and free()'ing a junk pointer isn't good.
 			 * Then again, _kvm_freeprocs() isn't used
 			 * anywhere . . .
 			 */
 			kd->procbase = _kvm_malloc(kd, 1);
 			goto liveout;
 		}
 		do {
 			size += size / 10;
 			kd->procbase = (struct kinfo_proc *)
 			    _kvm_realloc(kd, kd->procbase, size);
 			if (kd->procbase == NULL)
 				return (0);
 			osize = size;
 			st = sysctl(mib, temp_op == KERN_PROC_ALL ||
 			    temp_op == KERN_PROC_PROC ? 3 : 4,
 			    kd->procbase, &size, NULL, 0);
 		} while (st == -1 && errno == ENOMEM && size == osize);
 		if (st == -1) {
 			_kvm_syserr(kd, kd->program, "kvm_getprocs");
 			return (0);
 		}
 		/*
 		 * We have to check the size again because sysctl()
 		 * may "round up" oldlenp if oldp is NULL; hence it
 		 * might've told us that there was data to get when
 		 * there really isn't any.
 		 */
 		if (size > 0 &&
 		    kd->procbase->ki_structsize != sizeof(struct kinfo_proc)) {
 			_kvm_err(kd, kd->program,
 			    "kinfo_proc size mismatch (expected %zu, got %d)",
 			    sizeof(struct kinfo_proc),
 			    kd->procbase->ki_structsize);
 			return (0);
 		}
 liveout:
 		nprocs = size == 0 ? 0 : size / kd->procbase->ki_structsize;
 	} else {
 		struct nlist nl[7], *p;
 
 		nl[0].n_name = "_nprocs";
 		nl[1].n_name = "_allproc";
 		nl[2].n_name = "_zombproc";
 		nl[3].n_name = "_ticks";
 		nl[4].n_name = "_hz";
 		nl[5].n_name = "_cpu_tick_frequency";
 		nl[6].n_name = 0;
 
 		if (!kd->arch->ka_native(kd)) {
 			_kvm_err(kd, kd->program,
 			    "cannot read procs from non-native core");
 			return (0);
 		}
 
 		if (kvm_nlist(kd, nl) != 0) {
 			for (p = nl; p->n_type != 0; ++p)
 				;
 			_kvm_err(kd, kd->program,
 				 "%s: no such symbol", p->n_name);
 			return (0);
 		}
 		if (KREAD(kd, nl[0].n_value, &nprocs)) {
 			_kvm_err(kd, kd->program, "can't read nprocs");
 			return (0);
 		}
 		if (KREAD(kd, nl[3].n_value, &ticks)) {
 			_kvm_err(kd, kd->program, "can't read ticks");
 			return (0);
 		}
 		if (KREAD(kd, nl[4].n_value, &hz)) {
 			_kvm_err(kd, kd->program, "can't read hz");
 			return (0);
 		}
 		if (KREAD(kd, nl[5].n_value, &cpu_tick_frequency)) {
 			_kvm_err(kd, kd->program,
 			    "can't read cpu_tick_frequency");
 			return (0);
 		}
 		size = nprocs * sizeof(struct kinfo_proc);
 		kd->procbase = (struct kinfo_proc *)_kvm_malloc(kd, size);
 		if (kd->procbase == NULL)
 			return (0);
 
 		nprocs = kvm_deadprocs(kd, op, arg, nl[1].n_value,
 				      nl[2].n_value, nprocs);
 		if (nprocs <= 0) {
 			_kvm_freeprocs(kd);
 			nprocs = 0;
 		}
 #ifdef notdef
 		else {
 			size = nprocs * sizeof(struct kinfo_proc);
 			kd->procbase = realloc(kd->procbase, size);
 		}
 #endif
 	}
 	*cnt = nprocs;
 	return (kd->procbase);
 }
 
 void
 _kvm_freeprocs(kvm_t *kd)
 {
 
 	free(kd->procbase);
 	kd->procbase = NULL;
 }
 
 void *
 _kvm_realloc(kvm_t *kd, void *p, size_t n)
 {
 	void *np;
 
 	np = reallocf(p, n);
 	if (np == NULL)
 		_kvm_err(kd, kd->program, "out of memory");
 	return (np);
 }
 
 /*
  * Get the command args or environment.
  */
 static char **
 kvm_argv(kvm_t *kd, const struct kinfo_proc *kp, int env, int nchr)
 {
 	int oid[4];
 	int i;
 	size_t bufsz;
 	static int buflen;
 	static char *buf, *p;
 	static char **bufp;
 	static int argc;
 	char **nbufp;
 
 	if (!ISALIVE(kd)) {
 		_kvm_err(kd, kd->program,
 		    "cannot read user space from dead kernel");
 		return (NULL);
 	}
 
 	if (nchr == 0 || nchr > ARG_MAX)
 		nchr = ARG_MAX;
 	if (buflen == 0) {
 		buf = malloc(nchr);
 		if (buf == NULL) {
 			_kvm_err(kd, kd->program, "cannot allocate memory");
 			return (NULL);
 		}
 		argc = 32;
 		bufp = malloc(sizeof(char *) * argc);
 		if (bufp == NULL) {
 			free(buf);
 			buf = NULL;
 			_kvm_err(kd, kd->program, "cannot allocate memory");
 			return (NULL);
 		}
 		buflen = nchr;
 	} else if (nchr > buflen) {
 		p = realloc(buf, nchr);
 		if (p != NULL) {
 			buf = p;
 			buflen = nchr;
 		}
 	}
 	oid[0] = CTL_KERN;
 	oid[1] = KERN_PROC;
 	oid[2] = env ? KERN_PROC_ENV : KERN_PROC_ARGS;
 	oid[3] = kp->ki_pid;
 	bufsz = buflen;
 	if (sysctl(oid, 4, buf, &bufsz, 0, 0) == -1) {
 		/*
 		 * If the supplied buf is too short to hold the requested
 		 * value the sysctl returns with ENOMEM. The buf is filled
 		 * with the truncated value and the returned bufsz is equal
 		 * to the requested len.
 		 */
 		if (errno != ENOMEM || bufsz != (size_t)buflen)
 			return (NULL);
 		buf[bufsz - 1] = '\0';
 		errno = 0;
 	} else if (bufsz == 0)
 		return (NULL);
 	i = 0;
 	p = buf;
 	do {
 		bufp[i++] = p;
 		p += strlen(p) + 1;
 		if (i >= argc) {
 			argc += argc;
 			nbufp = realloc(bufp, sizeof(char *) * argc);
 			if (nbufp == NULL)
 				return (NULL);
 			bufp = nbufp;
 		}
 	} while (p < buf + bufsz);
 	bufp[i++] = 0;
 	return (bufp);
 }
 
 char **
 kvm_getargv(kvm_t *kd, const struct kinfo_proc *kp, int nchr)
 {
 	return (kvm_argv(kd, kp, 0, nchr));
 }
 
 char **
 kvm_getenvv(kvm_t *kd, const struct kinfo_proc *kp, int nchr)
 {
 	return (kvm_argv(kd, kp, 1, nchr));
 }
Index: head/lib/libmilter/Makefile
===================================================================
--- head/lib/libmilter/Makefile	(revision 318735)
+++ head/lib/libmilter/Makefile	(revision 318736)
@@ -1,36 +1,37 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PACKAGE=sendmail
 SENDMAIL_DIR=${SRCTOP}/contrib/sendmail
 .PATH:	${SENDMAIL_DIR}/libmilter ${SENDMAIL_DIR}/libsm
 
 CFLAGS+=-I${SENDMAIL_DIR}/src -I${SENDMAIL_DIR}/include -I.
 CFLAGS+=-DNOT_SENDMAIL -Dsm_snprintf=snprintf
 CFLAGS+=-D_THREAD_SAFE
 CFLAGS+=-DSM_CONF_POLL
 
 .if ${MK_INET6_SUPPORT} != "no"
 CFLAGS+=-DNETINET6
 .endif
 
 # User customizations to the sendmail build environment
 CFLAGS+=${SENDMAIL_CFLAGS}
 
 INCSDIR=${INCLUDEDIR}/libmilter
 INCS=	${SENDMAIL_DIR}/include/libmilter/mfapi.h \
 	${SENDMAIL_DIR}/include/libmilter/mfdef.h
 LIB=	milter
 
 SRCS+=	sm_os.h
 SRCS+=	main.c engine.c listener.c handler.c comm.c monitor.c smfi.c \
 	signal.c sm_gethost.c errstring.c strl.c worker.c
 CLEANFILES+=sm_os.h
 
 WARNS?=	0
+SHLIB_MAJOR= 6
 
 sm_os.h: ${SENDMAIL_DIR}/include/sm/os/sm_os_freebsd.h .NOMETA
 	ln -sf ${.ALLSRC} ${.TARGET}
 
 .include <bsd.lib.mk>
Index: head/lib/libprocstat/Makefile
===================================================================
--- head/lib/libprocstat/Makefile	(revision 318735)
+++ head/lib/libprocstat/Makefile	(revision 318736)
@@ -1,72 +1,76 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PACKAGE=lib${LIB}
 LIB=	procstat
 
 SRCS=	cd9660.c	\
 	common_kvm.c	\
 	core.c		\
 	libprocstat.c	\
 	msdosfs.c	\
 	smbfs.c		\
 	udf.c
 
+.if ${MK_SYMVER} == yes
+SRCS+=	libprocstat_compat.c
+.endif
+
 VERSION_DEF=	${LIBCSRCDIR}/Versions.def
 SYMBOL_MAPS=	${.CURDIR}/Symbol.map
 
 INCS=		libprocstat.h
 CFLAGS+=	-I. -I${.CURDIR} -D_KVM_VNODE
 SHLIB_MAJOR=	1
 
 LIBADD=		elf kvm util
 
 MAN=		libprocstat.3
 MLINKS+=libprocstat.3 procstat_close.3 \
 		libprocstat.3 procstat_freeargv.3 \
 		libprocstat.3 procstat_freeauxv.3 \
 		libprocstat.3 procstat_freeenvv.3 \
 		libprocstat.3 procstat_freefiles.3 \
 		libprocstat.3 procstat_freegroups.3 \
 		libprocstat.3 procstat_freekstack.3 \
 		libprocstat.3 procstat_freeprocs.3 \
 		libprocstat.3 procstat_freevmmap.3 \
 		libprocstat.3 procstat_get_pipe_info.3 \
 		libprocstat.3 procstat_get_pts_info.3 \
 		libprocstat.3 procstat_get_sem_info.3 \
 		libprocstat.3 procstat_get_shm_info.3 \
 		libprocstat.3 procstat_get_socket_info.3 \
 		libprocstat.3 procstat_get_vnode_info.3 \
 		libprocstat.3 procstat_getargv.3 \
 		libprocstat.3 procstat_getauxv.3 \
 		libprocstat.3 procstat_getenvv.3 \
 		libprocstat.3 procstat_getfiles.3 \
 		libprocstat.3 procstat_getgroups.3 \
 		libprocstat.3 procstat_getkstack.3 \
 		libprocstat.3 procstat_getosrel.3 \
 		libprocstat.3 procstat_getpathname.3 \
 		libprocstat.3 procstat_getprocs.3 \
 		libprocstat.3 procstat_getrlimit.3 \
 		libprocstat.3 procstat_getumask.3 \
 		libprocstat.3 procstat_getvmmap.3 \
 		libprocstat.3 procstat_open_core.3 \
 		libprocstat.3 procstat_open_kvm.3 \
 		libprocstat.3 procstat_open_sysctl.3
 
 # XXX This is a hack.
 .if ${MK_CDDL} != "no"
 CFLAGS+=	-DLIBPROCSTAT_ZFS
 OBJS+=	zfs/zfs.o
 SOBJS+=	zfs/zfs.pico
 POBJS+=	zfs/zfs.po
 SUBDIR=	zfs
 zfs/zfs.o: .PHONY
 	@cd ${.CURDIR}/zfs && ${MAKE} zfs.o
 zfs/zfs.pico: .PHONY
 	@cd ${.CURDIR}/zfs && ${MAKE} zfs.pico
 zfs/zfs.po: .PHONY
 	@cd ${.CURDIR}/zfs && ${MAKE} zfs.po
 .endif
 
 .include <bsd.lib.mk>
Index: head/lib/libprocstat/Symbol.map
===================================================================
--- head/lib/libprocstat/Symbol.map	(revision 318735)
+++ head/lib/libprocstat/Symbol.map	(revision 318736)
@@ -1,43 +1,43 @@
 /*
  * $FreeBSD$
  */
 FBSD_1.2 {
 	procstat_close;
 	procstat_freefiles;
 	procstat_freeprocs;
 	procstat_get_pipe_info;
-	procstat_get_pts_info;
 	procstat_get_socket_info;
-	procstat_get_vnode_info;
 	procstat_getfiles;
 	procstat_getprocs;
 	procstat_open_kvm;
 	procstat_open_sysctl;
 };
 
 FBSD_1.3 {
 	procstat_freeargv;
 	procstat_freeauxv;
 	procstat_freeenvv;
 	procstat_freegroups;
 	procstat_freekstack;
 	procstat_freevmmap;
-	procstat_get_sem_info;
-	procstat_get_shm_info;
 	procstat_getargv;
 	procstat_getauxv;
 	procstat_getenvv;
 	procstat_getgroups;
 	procstat_getkstack;
 	procstat_getosrel;
 	procstat_getpathname;
 	procstat_getrlimit;
 	procstat_getumask;
 	procstat_getvmmap;
 	procstat_open_core;
 };
 
 FBSD_1.5 {
 	procstat_freeptlwpinfo;
 	procstat_getptlwpinfo;
+	procstat_get_pts_info;
+	procstat_get_sem_info;
+	procstat_get_shm_info;
+	procstat_get_vnode_info;
 };
Index: head/lib/libprocstat/libprocstat.c
===================================================================
--- head/lib/libprocstat/libprocstat.c	(revision 318735)
+++ head/lib/libprocstat/libprocstat.c	(revision 318736)
@@ -1,2582 +1,2584 @@
 /*-
  * Copyright (c) 2017 Dell EMC
  * Copyright (c) 2009 Stanislav Sedov <stas@FreeBSD.org>
  * Copyright (c) 1988, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the University of
  *      California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/elf.h>
 #include <sys/time.h>
 #include <sys/resourcevar.h>
 #define	_WANT_UCRED
 #include <sys/ucred.h>
 #undef _WANT_UCRED
 #include <sys/proc.h>
 #include <sys/user.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/sysctl.h>
 #include <sys/tty.h>
 #include <sys/filedesc.h>
 #include <sys/queue.h>
 #define	_WANT_FILE
 #include <sys/file.h>
 #include <sys/conf.h>
 #include <sys/ksem.h>
 #include <sys/mman.h>
 #include <sys/capsicum.h>
 #include <sys/ptrace.h>
 #define	_KERNEL
 #include <sys/mount.h>
 #include <sys/pipe.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 #undef _KERNEL
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfsclient/nfsnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #define	_WANT_INPCB
 #include <netinet/in_pcb.h>
 
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <fcntl.h>
 #include <kvm.h>
 #include <libutil.h>
 #include <limits.h>
 #include <paths.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
 #include <string.h>
 #include <unistd.h>
 #include <netdb.h>
 
 #include <libprocstat.h>
 #include "libprocstat_internal.h"
 #include "common_kvm.h"
 #include "core.h"
 
 int     statfs(const char *, struct statfs *);	/* XXX */
 
 #define	PROCSTAT_KVM	1
 #define	PROCSTAT_SYSCTL	2
 #define	PROCSTAT_CORE	3
 
 static char	**getargv(struct procstat *procstat, struct kinfo_proc *kp,
     size_t nchr, int env);
 static char	*getmnton(kvm_t *kd, struct mount *m);
 static struct kinfo_vmentry *	kinfo_getvmmap_core(struct procstat_core *core,
     int *cntp);
 static Elf_Auxinfo	*procstat_getauxv_core(struct procstat_core *core,
     unsigned int *cntp);
 static Elf_Auxinfo	*procstat_getauxv_sysctl(pid_t pid, unsigned int *cntp);
 static struct filestat_list	*procstat_getfiles_kvm(
     struct procstat *procstat, struct kinfo_proc *kp, int mmapped);
 static struct filestat_list	*procstat_getfiles_sysctl(
     struct procstat *procstat, struct kinfo_proc *kp, int mmapped);
 static int	procstat_get_pipe_info_sysctl(struct filestat *fst,
     struct pipestat *pipe, char *errbuf);
 static int	procstat_get_pipe_info_kvm(kvm_t *kd, struct filestat *fst,
     struct pipestat *pipe, char *errbuf);
 static int	procstat_get_pts_info_sysctl(struct filestat *fst,
     struct ptsstat *pts, char *errbuf);
 static int	procstat_get_pts_info_kvm(kvm_t *kd, struct filestat *fst,
     struct ptsstat *pts, char *errbuf);
 static int	procstat_get_sem_info_sysctl(struct filestat *fst,
     struct semstat *sem, char *errbuf);
 static int	procstat_get_sem_info_kvm(kvm_t *kd, struct filestat *fst,
     struct semstat *sem, char *errbuf);
 static int	procstat_get_shm_info_sysctl(struct filestat *fst,
     struct shmstat *shm, char *errbuf);
 static int	procstat_get_shm_info_kvm(kvm_t *kd, struct filestat *fst,
     struct shmstat *shm, char *errbuf);
 static int	procstat_get_socket_info_sysctl(struct filestat *fst,
     struct sockstat *sock, char *errbuf);
 static int	procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst,
     struct sockstat *sock, char *errbuf);
 static int	to_filestat_flags(int flags);
 static int	procstat_get_vnode_info_kvm(kvm_t *kd, struct filestat *fst,
     struct vnstat *vn, char *errbuf);
 static int	procstat_get_vnode_info_sysctl(struct filestat *fst,
     struct vnstat *vn, char *errbuf);
 static gid_t	*procstat_getgroups_core(struct procstat_core *core,
     unsigned int *count);
 static gid_t *	procstat_getgroups_kvm(kvm_t *kd, struct kinfo_proc *kp,
     unsigned int *count);
 static gid_t	*procstat_getgroups_sysctl(pid_t pid, unsigned int *count);
 static struct kinfo_kstack	*procstat_getkstack_sysctl(pid_t pid,
     int *cntp);
 static int	procstat_getosrel_core(struct procstat_core *core,
     int *osrelp);
 static int	procstat_getosrel_kvm(kvm_t *kd, struct kinfo_proc *kp,
     int *osrelp);
 static int	procstat_getosrel_sysctl(pid_t pid, int *osrelp);
 static int	procstat_getpathname_core(struct procstat_core *core,
     char *pathname, size_t maxlen);
 static int	procstat_getpathname_sysctl(pid_t pid, char *pathname,
     size_t maxlen);
 static int	procstat_getrlimit_core(struct procstat_core *core, int which,
     struct rlimit* rlimit);
 static int	procstat_getrlimit_kvm(kvm_t *kd, struct kinfo_proc *kp,
     int which, struct rlimit* rlimit);
 static int	procstat_getrlimit_sysctl(pid_t pid, int which,
     struct rlimit* rlimit);
 static int	procstat_getumask_core(struct procstat_core *core,
     unsigned short *maskp);
 static int	procstat_getumask_kvm(kvm_t *kd, struct kinfo_proc *kp,
     unsigned short *maskp);
 static int	procstat_getumask_sysctl(pid_t pid, unsigned short *maskp);
 static int	vntype2psfsttype(int type);
 
 void
 procstat_close(struct procstat *procstat)
 {
 
 	assert(procstat);
 	if (procstat->type == PROCSTAT_KVM)
 		kvm_close(procstat->kd);
 	else if (procstat->type == PROCSTAT_CORE)
 		procstat_core_close(procstat->core);
 	procstat_freeargv(procstat);
 	procstat_freeenvv(procstat);
 	free(procstat);
 }
 
 struct procstat *
 procstat_open_sysctl(void)
 {
 	struct procstat *procstat;
 
 	procstat = calloc(1, sizeof(*procstat));
 	if (procstat == NULL) {
 		warn("malloc()");
 		return (NULL);
 	}
 	procstat->type = PROCSTAT_SYSCTL;
 	return (procstat);
 }
 
 struct procstat *
 procstat_open_kvm(const char *nlistf, const char *memf)
 {
 	struct procstat *procstat;
 	kvm_t *kd;
 	char buf[_POSIX2_LINE_MAX];
 
 	procstat = calloc(1, sizeof(*procstat));
 	if (procstat == NULL) {
 		warn("malloc()");
 		return (NULL);
 	}
 	kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, buf);
 	if (kd == NULL) {
 		warnx("kvm_openfiles(): %s", buf);
 		free(procstat);
 		return (NULL);
 	}
 	procstat->type = PROCSTAT_KVM;
 	procstat->kd = kd;
 	return (procstat);
 }
 
 struct procstat *
 procstat_open_core(const char *filename)
 {
 	struct procstat *procstat;
 	struct procstat_core *core;
 
 	procstat = calloc(1, sizeof(*procstat));
 	if (procstat == NULL) {
 		warn("malloc()");
 		return (NULL);
 	}
 	core = procstat_core_open(filename);
 	if (core == NULL) {
 		free(procstat);
 		return (NULL);
 	}
 	procstat->type = PROCSTAT_CORE;
 	procstat->core = core;
 	return (procstat);
 }
 
 struct kinfo_proc *
 procstat_getprocs(struct procstat *procstat, int what, int arg,
     unsigned int *count)
 {
 	struct kinfo_proc *p0, *p;
 	size_t len, olen;
 	int name[4];
 	int cnt;
 	int error;
 
 	assert(procstat);
 	assert(count);
 	p = NULL;
 	if (procstat->type == PROCSTAT_KVM) {
 		*count = 0;
 		p0 = kvm_getprocs(procstat->kd, what, arg, &cnt);
 		if (p0 == NULL || cnt <= 0)
 			return (NULL);
 		*count = cnt;
 		len = *count * sizeof(*p);
 		p = malloc(len);
 		if (p == NULL) {
 			warnx("malloc(%zu)", len);
 			goto fail;
 		}
 		bcopy(p0, p, len);
 		return (p);
 	} else if (procstat->type == PROCSTAT_SYSCTL) {
 		len = 0;
 		name[0] = CTL_KERN;
 		name[1] = KERN_PROC;
 		name[2] = what;
 		name[3] = arg;
 		error = sysctl(name, nitems(name), NULL, &len, NULL, 0);
 		if (error < 0 && errno != EPERM) {
 			warn("sysctl(kern.proc)");
 			goto fail;
 		}
 		if (len == 0) {
 			warnx("no processes?");
 			goto fail;
 		}
 		do {
 			len += len / 10;
 			p = reallocf(p, len);
 			if (p == NULL) {
 				warnx("reallocf(%zu)", len);
 				goto fail;
 			}
 			olen = len;
 			error = sysctl(name, nitems(name), p, &len, NULL, 0);
 		} while (error < 0 && errno == ENOMEM && olen == len);
 		if (error < 0 && errno != EPERM) {
 			warn("sysctl(kern.proc)");
 			goto fail;
 		}
 		/* Perform simple consistency checks. */
 		if ((len % sizeof(*p)) != 0 || p->ki_structsize != sizeof(*p)) {
 			warnx("kinfo_proc structure size mismatch (len = %zu)", len);
 			goto fail;
 		}
 		*count = len / sizeof(*p);
 		return (p);
 	} else if (procstat->type == PROCSTAT_CORE) {
 		p = procstat_core_get(procstat->core, PSC_TYPE_PROC, NULL,
 		    &len);
 		if ((len % sizeof(*p)) != 0 || p->ki_structsize != sizeof(*p)) {
 			warnx("kinfo_proc structure size mismatch");
 			goto fail;
 		}
 		*count = len / sizeof(*p);
 		return (p);
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 fail:
 	if (p)
 		free(p);
 	return (NULL);
 }
 
 void
 procstat_freeprocs(struct procstat *procstat __unused, struct kinfo_proc *p)
 {
 
 	if (p != NULL)
 		free(p);
 	p = NULL;
 }
 
 struct filestat_list *
 procstat_getfiles(struct procstat *procstat, struct kinfo_proc *kp, int mmapped)
 {
 
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		return (procstat_getfiles_kvm(procstat, kp, mmapped));
 	case PROCSTAT_SYSCTL:
 	case PROCSTAT_CORE:
 		return (procstat_getfiles_sysctl(procstat, kp, mmapped));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freefiles(struct procstat *procstat, struct filestat_list *head)
 {
 	struct filestat *fst, *tmp;
 
 	STAILQ_FOREACH_SAFE(fst, head, next, tmp) {
 		if (fst->fs_path != NULL)
 			free(fst->fs_path);
 		free(fst);
 	}
 	free(head);
 	if (procstat->vmentries != NULL) {
 		free(procstat->vmentries);
 		procstat->vmentries = NULL;
 	}
 	if (procstat->files != NULL) {
 		free(procstat->files);
 		procstat->files = NULL;
 	}
 }
 
 static struct filestat *
 filestat_new_entry(void *typedep, int type, int fd, int fflags, int uflags,
     int refcount, off_t offset, char *path, cap_rights_t *cap_rightsp)
 {
 	struct filestat *entry;
 
 	entry = calloc(1, sizeof(*entry));
 	if (entry == NULL) {
 		warn("malloc()");
 		return (NULL);
 	}
 	entry->fs_typedep = typedep;
 	entry->fs_fflags = fflags;
 	entry->fs_uflags = uflags;
 	entry->fs_fd = fd;
 	entry->fs_type = type;
 	entry->fs_ref_count = refcount;
 	entry->fs_offset = offset;
 	entry->fs_path = path;
 	if (cap_rightsp != NULL)
 		entry->fs_cap_rights = *cap_rightsp;
 	else
 		cap_rights_init(&entry->fs_cap_rights);
 	return (entry);
 }
 
 static struct vnode *
 getctty(kvm_t *kd, struct kinfo_proc *kp)
 {
 	struct pgrp pgrp;
 	struct proc proc;
 	struct session sess;
 	int error;
                         
 	assert(kp);
 	error = kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc,
 	    sizeof(proc));
 	if (error == 0) {
 		warnx("can't read proc struct at %p for pid %d",
 		    kp->ki_paddr, kp->ki_pid);
 		return (NULL);
 	}
 	if (proc.p_pgrp == NULL)
 		return (NULL);
 	error = kvm_read_all(kd, (unsigned long)proc.p_pgrp, &pgrp,
 	    sizeof(pgrp));
 	if (error == 0) {
 		warnx("can't read pgrp struct at %p for pid %d",
 		    proc.p_pgrp, kp->ki_pid);
 		return (NULL);
 	}
 	error = kvm_read_all(kd, (unsigned long)pgrp.pg_session, &sess,
 	    sizeof(sess));
 	if (error == 0) {
 		warnx("can't read session struct at %p for pid %d",
 		    pgrp.pg_session, kp->ki_pid);
 		return (NULL);
 	}
 	return (sess.s_ttyvp);
 }
 
 static struct filestat_list *
 procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmapped)
 {
 	struct file file;
 	struct filedesc filed;
 	struct vm_map_entry vmentry;
 	struct vm_object object;
 	struct vmspace vmspace;
 	vm_map_entry_t entryp;
 	vm_map_t map;
 	vm_object_t objp;
 	struct vnode *vp;
 	struct file **ofiles;
 	struct filestat *entry;
 	struct filestat_list *head;
 	kvm_t *kd;
 	void *data;
 	int i, fflags;
 	int prot, type;
 	unsigned int nfiles;
 
 	assert(procstat);
 	kd = procstat->kd;
 	if (kd == NULL)
 		return (NULL);
 	if (kp->ki_fd == NULL)
 		return (NULL);
 	if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &filed,
 	    sizeof(filed))) {
 		warnx("can't read filedesc at %p", (void *)kp->ki_fd);
 		return (NULL);
 	}
 
 	/*
 	 * Allocate list head.
 	 */
 	head = malloc(sizeof(*head));
 	if (head == NULL)
 		return (NULL);
 	STAILQ_INIT(head);
 
 	/* root directory vnode, if one. */
 	if (filed.fd_rdir) {
 		entry = filestat_new_entry(filed.fd_rdir, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	/* current working directory vnode. */
 	if (filed.fd_cdir) {
 		entry = filestat_new_entry(filed.fd_cdir, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	/* jail root, if any. */
 	if (filed.fd_jdir) {
 		entry = filestat_new_entry(filed.fd_jdir, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	/* ktrace vnode, if one */
 	if (kp->ki_tracep) {
 		entry = filestat_new_entry(kp->ki_tracep, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ | PS_FST_FFLAG_WRITE,
 		    PS_FST_UFLAG_TRACE, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	/* text vnode, if one */
 	if (kp->ki_textvp) {
 		entry = filestat_new_entry(kp->ki_textvp, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ, PS_FST_UFLAG_TEXT, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	/* Controlling terminal. */
 	if ((vp = getctty(kd, kp)) != NULL) {
 		entry = filestat_new_entry(vp, PS_FST_TYPE_VNODE, -1,
 		    PS_FST_FFLAG_READ | PS_FST_FFLAG_WRITE,
 		    PS_FST_UFLAG_CTTY, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 
 	nfiles = filed.fd_lastfile + 1;
 	ofiles = malloc(nfiles * sizeof(struct file *));
 	if (ofiles == NULL) {
 		warn("malloc(%zu)", nfiles * sizeof(struct file *));
 		goto do_mmapped;
 	}
 	if (!kvm_read_all(kd, (unsigned long)filed.fd_ofiles, ofiles,
 	    nfiles * sizeof(struct file *))) {
 		warnx("cannot read file structures at %p",
 		    (void *)filed.fd_ofiles);
 		free(ofiles);
 		goto do_mmapped;
 	}
 	for (i = 0; i <= filed.fd_lastfile; i++) {
 		if (ofiles[i] == NULL)
 			continue;
 		if (!kvm_read_all(kd, (unsigned long)ofiles[i], &file,
 		    sizeof(struct file))) {
 			warnx("can't read file %d at %p", i,
 			    (void *)ofiles[i]);
 			continue;
 		}
 		switch (file.f_type) {
 		case DTYPE_VNODE:
 			type = PS_FST_TYPE_VNODE;
 			data = file.f_vnode;
 			break;
 		case DTYPE_SOCKET:
 			type = PS_FST_TYPE_SOCKET;
 			data = file.f_data;
 			break;
 		case DTYPE_PIPE:
 			type = PS_FST_TYPE_PIPE;
 			data = file.f_data;
 			break;
 		case DTYPE_FIFO:
 			type = PS_FST_TYPE_FIFO;
 			data = file.f_vnode;
 			break;
 #ifdef DTYPE_PTS
 		case DTYPE_PTS:
 			type = PS_FST_TYPE_PTS;
 			data = file.f_data;
 			break;
 #endif
 		case DTYPE_SEM:
 			type = PS_FST_TYPE_SEM;
 			data = file.f_data;
 			break;
 		case DTYPE_SHM:
 			type = PS_FST_TYPE_SHM;
 			data = file.f_data;
 			break;
 		default:
 			continue;
 		}
 		/* XXXRW: No capability rights support for kvm yet. */
 		entry = filestat_new_entry(data, type, i,
 		    to_filestat_flags(file.f_flag), 0, 0, 0, NULL, NULL);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	free(ofiles);
 
 do_mmapped:
 
 	/*
 	 * Process mmapped files if requested.
 	 */
 	if (mmapped) {
 		if (!kvm_read_all(kd, (unsigned long)kp->ki_vmspace, &vmspace,
 		    sizeof(vmspace))) {
 			warnx("can't read vmspace at %p",
 			    (void *)kp->ki_vmspace);
 			goto exit;
 		}
 		map = &vmspace.vm_map;
 
 		for (entryp = map->header.next;
 		    entryp != &kp->ki_vmspace->vm_map.header;
 		    entryp = vmentry.next) {
 			if (!kvm_read_all(kd, (unsigned long)entryp, &vmentry,
 			    sizeof(vmentry))) {
 				warnx("can't read vm_map_entry at %p",
 				    (void *)entryp);
 				continue;
 			}
 			if (vmentry.eflags & MAP_ENTRY_IS_SUB_MAP)
 				continue;
 			if ((objp = vmentry.object.vm_object) == NULL)
 				continue;
 			for (; objp; objp = object.backing_object) {
 				if (!kvm_read_all(kd, (unsigned long)objp,
 				    &object, sizeof(object))) {
 					warnx("can't read vm_object at %p",
 					    (void *)objp);
 					break;
 				}
 			}
 
 			/* We want only vnode objects. */
 			if (object.type != OBJT_VNODE)
 				continue;
 
 			prot = vmentry.protection;
 			fflags = 0;
 			if (prot & VM_PROT_READ)
 				fflags = PS_FST_FFLAG_READ;
 			if ((vmentry.eflags & MAP_ENTRY_COW) == 0 &&
 			    prot & VM_PROT_WRITE)
 				fflags |= PS_FST_FFLAG_WRITE;
 
 			/*
 			 * Create filestat entry.
 			 */
 			entry = filestat_new_entry(object.handle,
 			    PS_FST_TYPE_VNODE, -1, fflags,
 			    PS_FST_UFLAG_MMAP, 0, 0, NULL, NULL);
 			if (entry != NULL)
 				STAILQ_INSERT_TAIL(head, entry, next);
 		}
 	}
 exit:
 	return (head);
 }
 
 /*
  * kinfo types to filestat translation.
  */
 static int
 kinfo_type2fst(int kftype)
 {
 	static struct {
 		int	kf_type;
 		int	fst_type;
 	} kftypes2fst[] = {
 		{ KF_TYPE_CRYPTO, PS_FST_TYPE_CRYPTO },
 		{ KF_TYPE_FIFO, PS_FST_TYPE_FIFO },
 		{ KF_TYPE_KQUEUE, PS_FST_TYPE_KQUEUE },
 		{ KF_TYPE_MQUEUE, PS_FST_TYPE_MQUEUE },
 		{ KF_TYPE_NONE, PS_FST_TYPE_NONE },
 		{ KF_TYPE_PIPE, PS_FST_TYPE_PIPE },
 		{ KF_TYPE_PTS, PS_FST_TYPE_PTS },
 		{ KF_TYPE_SEM, PS_FST_TYPE_SEM },
 		{ KF_TYPE_SHM, PS_FST_TYPE_SHM },
 		{ KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET },
 		{ KF_TYPE_VNODE, PS_FST_TYPE_VNODE },
 		{ KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN }
 	};
 #define NKFTYPES	(sizeof(kftypes2fst) / sizeof(*kftypes2fst))
 	unsigned int i;
 
 	for (i = 0; i < NKFTYPES; i++)
 		if (kftypes2fst[i].kf_type == kftype)
 			break;
 	if (i == NKFTYPES)
 		return (PS_FST_TYPE_UNKNOWN);
 	return (kftypes2fst[i].fst_type);
 }
 
 /*
  * kinfo flags to filestat translation.
  */
 static int
 kinfo_fflags2fst(int kfflags)
 {
 	static struct {
 		int	kf_flag;
 		int	fst_flag;
 	} kfflags2fst[] = {
 		{ KF_FLAG_APPEND, PS_FST_FFLAG_APPEND },
 		{ KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC },
 		{ KF_FLAG_CREAT, PS_FST_FFLAG_CREAT },
 		{ KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT },
 		{ KF_FLAG_EXCL, PS_FST_FFLAG_EXCL },
 		{ KF_FLAG_EXEC, PS_FST_FFLAG_EXEC },
 		{ KF_FLAG_EXLOCK, PS_FST_FFLAG_EXLOCK },
 		{ KF_FLAG_FSYNC, PS_FST_FFLAG_SYNC },
 		{ KF_FLAG_HASLOCK, PS_FST_FFLAG_HASLOCK },
 		{ KF_FLAG_NOFOLLOW, PS_FST_FFLAG_NOFOLLOW },
 		{ KF_FLAG_NONBLOCK, PS_FST_FFLAG_NONBLOCK },
 		{ KF_FLAG_READ, PS_FST_FFLAG_READ },
 		{ KF_FLAG_SHLOCK, PS_FST_FFLAG_SHLOCK },
 		{ KF_FLAG_TRUNC, PS_FST_FFLAG_TRUNC },
 		{ KF_FLAG_WRITE, PS_FST_FFLAG_WRITE }
 	};
 #define NKFFLAGS	(sizeof(kfflags2fst) / sizeof(*kfflags2fst))
 	unsigned int i;
 	int flags;
 
 	flags = 0;
 	for (i = 0; i < NKFFLAGS; i++)
 		if ((kfflags & kfflags2fst[i].kf_flag) != 0)
 			flags |= kfflags2fst[i].fst_flag;
 	return (flags);
 }
 
 static int
 kinfo_uflags2fst(int fd)
 {
 
 	switch (fd) {
 	case KF_FD_TYPE_CTTY:
 		return (PS_FST_UFLAG_CTTY);
 	case KF_FD_TYPE_CWD:
 		return (PS_FST_UFLAG_CDIR);
 	case KF_FD_TYPE_JAIL:
 		return (PS_FST_UFLAG_JAIL);
 	case KF_FD_TYPE_TEXT:
 		return (PS_FST_UFLAG_TEXT);
 	case KF_FD_TYPE_TRACE:
 		return (PS_FST_UFLAG_TRACE);
 	case KF_FD_TYPE_ROOT:
 		return (PS_FST_UFLAG_RDIR);
 	}
 	return (0);
 }
 
 static struct kinfo_file *
 kinfo_getfile_core(struct procstat_core *core, int *cntp)
 {
 	int cnt;
 	size_t len;
 	char *buf, *bp, *eb;
 	struct kinfo_file *kif, *kp, *kf;
 
 	buf = procstat_core_get(core, PSC_TYPE_FILES, NULL, &len);
 	if (buf == NULL)
 		return (NULL);
 	/*
 	 * XXXMG: The code below is just copy&past from libutil.
 	 * The code duplication can be avoided if libutil
 	 * is extended to provide something like:
 	 *   struct kinfo_file *kinfo_getfile_from_buf(const char *buf,
 	 *       size_t len, int *cntp);
 	 */
 
 	/* Pass 1: count items */
 	cnt = 0;
 	bp = buf;
 	eb = buf + len;
 	while (bp < eb) {
 		kf = (struct kinfo_file *)(uintptr_t)bp;
 		if (kf->kf_structsize == 0)
 			break;
 		bp += kf->kf_structsize;
 		cnt++;
 	}
 
 	kif = calloc(cnt, sizeof(*kif));
 	if (kif == NULL) {
 		free(buf);
 		return (NULL);
 	}
 	bp = buf;
 	eb = buf + len;
 	kp = kif;
 	/* Pass 2: unpack */
 	while (bp < eb) {
 		kf = (struct kinfo_file *)(uintptr_t)bp;
 		if (kf->kf_structsize == 0)
 			break;
 		/* Copy/expand into pre-zeroed buffer */
 		memcpy(kp, kf, kf->kf_structsize);
 		/* Advance to next packed record */
 		bp += kf->kf_structsize;
 		/* Set field size to fixed length, advance */
 		kp->kf_structsize = sizeof(*kp);
 		kp++;
 	}
 	free(buf);
 	*cntp = cnt;
 	return (kif);	/* Caller must free() return value */
 }
 
 static struct filestat_list *
 procstat_getfiles_sysctl(struct procstat *procstat, struct kinfo_proc *kp,
     int mmapped)
 {
 	struct kinfo_file *kif, *files;
 	struct kinfo_vmentry *kve, *vmentries;
 	struct filestat_list *head;
 	struct filestat *entry;
 	char *path;
 	off_t offset;
 	int cnt, fd, fflags;
 	int i, type, uflags;
 	int refcount;
 	cap_rights_t cap_rights;
 
 	assert(kp);
 	if (kp->ki_fd == NULL)
 		return (NULL);
 	switch(procstat->type) {
 	case PROCSTAT_SYSCTL:
 		files = kinfo_getfile(kp->ki_pid, &cnt);
 		break;
 	case PROCSTAT_CORE:
 		files = kinfo_getfile_core(procstat->core, &cnt);
 		break;
 	default:
 		assert(!"invalid type");
 	}
 	if (files == NULL && errno != EPERM) {
 		warn("kinfo_getfile()");
 		return (NULL);
 	}
 	procstat->files = files;
 
 	/*
 	 * Allocate list head.
 	 */
 	head = malloc(sizeof(*head));
 	if (head == NULL)
 		return (NULL);
 	STAILQ_INIT(head);
 	for (i = 0; i < cnt; i++) {
 		kif = &files[i];
 
 		type = kinfo_type2fst(kif->kf_type);
 		fd = kif->kf_fd >= 0 ? kif->kf_fd : -1;
 		fflags = kinfo_fflags2fst(kif->kf_flags);
 		uflags = kinfo_uflags2fst(kif->kf_fd);
 		refcount = kif->kf_ref_count;
 		offset = kif->kf_offset;
 		if (*kif->kf_path != '\0')
 			path = strdup(kif->kf_path);
 		else
 			path = NULL;
 		cap_rights = kif->kf_cap_rights;
 
 		/*
 		 * Create filestat entry.
 		 */
 		entry = filestat_new_entry(kif, type, fd, fflags, uflags,
 		    refcount, offset, path, &cap_rights);
 		if (entry != NULL)
 			STAILQ_INSERT_TAIL(head, entry, next);
 	}
 	if (mmapped != 0) {
 		vmentries = procstat_getvmmap(procstat, kp, &cnt);
 		procstat->vmentries = vmentries;
 		if (vmentries == NULL || cnt == 0)
 			goto fail;
 		for (i = 0; i < cnt; i++) {
 			kve = &vmentries[i];
 			if (kve->kve_type != KVME_TYPE_VNODE)
 				continue;
 			fflags = 0;
 			if (kve->kve_protection & KVME_PROT_READ)
 				fflags = PS_FST_FFLAG_READ;
 			if ((kve->kve_flags & KVME_FLAG_COW) == 0 &&
 			    kve->kve_protection & KVME_PROT_WRITE)
 				fflags |= PS_FST_FFLAG_WRITE;
 			offset = kve->kve_offset;
 			refcount = kve->kve_ref_count;
 			if (*kve->kve_path != '\0')
 				path = strdup(kve->kve_path);
 			else
 				path = NULL;
 			entry = filestat_new_entry(kve, PS_FST_TYPE_VNODE, -1,
 			    fflags, PS_FST_UFLAG_MMAP, refcount, offset, path,
 			    NULL);
 			if (entry != NULL)
 				STAILQ_INSERT_TAIL(head, entry, next);
 		}
 	}
 fail:
 	return (head);
 }
 
 int
 procstat_get_pipe_info(struct procstat *procstat, struct filestat *fst,
     struct pipestat *ps, char *errbuf)
 {
 
 	assert(ps);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_pipe_info_kvm(procstat->kd, fst, ps,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 		procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_pipe_info_sysctl(fst, ps, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_pipe_info_kvm(kvm_t *kd, struct filestat *fst,
     struct pipestat *ps, char *errbuf)
 {
 	struct pipe pi;
 	void *pipep;
 
 	assert(kd);
 	assert(ps);
 	assert(fst);
 	bzero(ps, sizeof(*ps));
 	pipep = fst->fs_typedep;
 	if (pipep == NULL)
 		goto fail;
 	if (!kvm_read_all(kd, (unsigned long)pipep, &pi, sizeof(struct pipe))) {
 		warnx("can't read pipe at %p", (void *)pipep);
 		goto fail;
 	}
 	ps->addr = (uintptr_t)pipep;
 	ps->peer = (uintptr_t)pi.pipe_peer;
 	ps->buffer_cnt = pi.pipe_buffer.cnt;
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 static int
 procstat_get_pipe_info_sysctl(struct filestat *fst, struct pipestat *ps,
     char *errbuf __unused)
 {
 	struct kinfo_file *kif;
 
 	assert(ps);
 	assert(fst);
 	bzero(ps, sizeof(*ps));
 	kif = fst->fs_typedep;
 	if (kif == NULL)
 		return (1);
 	ps->addr = kif->kf_un.kf_pipe.kf_pipe_addr;
 	ps->peer = kif->kf_un.kf_pipe.kf_pipe_peer;
 	ps->buffer_cnt = kif->kf_un.kf_pipe.kf_pipe_buffer_cnt;
 	return (0);
 }
 
 int
 procstat_get_pts_info(struct procstat *procstat, struct filestat *fst,
     struct ptsstat *pts, char *errbuf)
 {
 
 	assert(pts);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_pts_info_kvm(procstat->kd, fst, pts,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 		procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_pts_info_sysctl(fst, pts, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_pts_info_kvm(kvm_t *kd, struct filestat *fst,
     struct ptsstat *pts, char *errbuf)
 {
 	struct tty tty;
 	void *ttyp;
 
 	assert(kd);
 	assert(pts);
 	assert(fst);
 	bzero(pts, sizeof(*pts));
 	ttyp = fst->fs_typedep;
 	if (ttyp == NULL)
 		goto fail;
 	if (!kvm_read_all(kd, (unsigned long)ttyp, &tty, sizeof(struct tty))) {
 		warnx("can't read tty at %p", (void *)ttyp);
 		goto fail;
 	}
 	pts->dev = dev2udev(kd, tty.t_dev);
 	(void)kdevtoname(kd, tty.t_dev, pts->devname);
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 static int
 procstat_get_pts_info_sysctl(struct filestat *fst, struct ptsstat *pts,
     char *errbuf __unused)
 {
 	struct kinfo_file *kif;
 
 	assert(pts);
 	assert(fst);
 	bzero(pts, sizeof(*pts));
 	kif = fst->fs_typedep;
 	if (kif == NULL)
 		return (0);
 	pts->dev = kif->kf_un.kf_pts.kf_pts_dev;
 	strlcpy(pts->devname, kif->kf_path, sizeof(pts->devname));
 	return (0);
 }
 
 int
 procstat_get_sem_info(struct procstat *procstat, struct filestat *fst,
     struct semstat *sem, char *errbuf)
 {
 
 	assert(sem);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_sem_info_kvm(procstat->kd, fst, sem,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 	    procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_sem_info_sysctl(fst, sem, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_sem_info_kvm(kvm_t *kd, struct filestat *fst,
     struct semstat *sem, char *errbuf)
 {
 	struct ksem ksem;
 	void *ksemp;
 	char *path;
 	int i;
 
 	assert(kd);
 	assert(sem);
 	assert(fst);
 	bzero(sem, sizeof(*sem));
 	ksemp = fst->fs_typedep;
 	if (ksemp == NULL)
 		goto fail;
 	if (!kvm_read_all(kd, (unsigned long)ksemp, &ksem,
 	    sizeof(struct ksem))) {
 		warnx("can't read ksem at %p", (void *)ksemp);
 		goto fail;
 	}
 	sem->mode = S_IFREG | ksem.ks_mode;
 	sem->value = ksem.ks_value;
 	if (fst->fs_path == NULL && ksem.ks_path != NULL) {
 		path = malloc(MAXPATHLEN);
 		for (i = 0; i < MAXPATHLEN - 1; i++) {
 			if (!kvm_read_all(kd, (unsigned long)ksem.ks_path + i,
 			    path + i, 1))
 				break;
 			if (path[i] == '\0')
 				break;
 		}
 		path[i] = '\0';
 		if (i == 0)
 			free(path);
 		else
 			fst->fs_path = path;
 	}
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 static int
 procstat_get_sem_info_sysctl(struct filestat *fst, struct semstat *sem,
     char *errbuf __unused)
 {
 	struct kinfo_file *kif;
 
 	assert(sem);
 	assert(fst);
 	bzero(sem, sizeof(*sem));
 	kif = fst->fs_typedep;
 	if (kif == NULL)
 		return (0);
 	sem->value = kif->kf_un.kf_sem.kf_sem_value;
 	sem->mode = kif->kf_un.kf_sem.kf_sem_mode;
 	return (0);
 }
 
 int
 procstat_get_shm_info(struct procstat *procstat, struct filestat *fst,
     struct shmstat *shm, char *errbuf)
 {
 
 	assert(shm);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_shm_info_kvm(procstat->kd, fst, shm,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 	    procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_shm_info_sysctl(fst, shm, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_shm_info_kvm(kvm_t *kd, struct filestat *fst,
     struct shmstat *shm, char *errbuf)
 {
 	struct shmfd shmfd;
 	void *shmfdp;
 	char *path;
 	int i;
 
 	assert(kd);
 	assert(shm);
 	assert(fst);
 	bzero(shm, sizeof(*shm));
 	shmfdp = fst->fs_typedep;
 	if (shmfdp == NULL)
 		goto fail;
 	if (!kvm_read_all(kd, (unsigned long)shmfdp, &shmfd,
 	    sizeof(struct shmfd))) {
 		warnx("can't read shmfd at %p", (void *)shmfdp);
 		goto fail;
 	}
 	shm->mode = S_IFREG | shmfd.shm_mode;
 	shm->size = shmfd.shm_size;
 	if (fst->fs_path == NULL && shmfd.shm_path != NULL) {
 		path = malloc(MAXPATHLEN);
 		for (i = 0; i < MAXPATHLEN - 1; i++) {
 			if (!kvm_read_all(kd, (unsigned long)shmfd.shm_path + i,
 			    path + i, 1))
 				break;
 			if (path[i] == '\0')
 				break;
 		}
 		path[i] = '\0';
 		if (i == 0)
 			free(path);
 		else
 			fst->fs_path = path;
 	}
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 static int
 procstat_get_shm_info_sysctl(struct filestat *fst, struct shmstat *shm,
     char *errbuf __unused)
 {
 	struct kinfo_file *kif;
 
 	assert(shm);
 	assert(fst);
 	bzero(shm, sizeof(*shm));
 	kif = fst->fs_typedep;
 	if (kif == NULL)
 		return (0);
 	shm->size = kif->kf_un.kf_file.kf_file_size;
 	shm->mode = kif->kf_un.kf_file.kf_file_mode;
 	return (0);
 }
 
 int
 procstat_get_vnode_info(struct procstat *procstat, struct filestat *fst,
     struct vnstat *vn, char *errbuf)
 {
 
 	assert(vn);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_vnode_info_kvm(procstat->kd, fst, vn,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 		procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_vnode_info_sysctl(fst, vn, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_vnode_info_kvm(kvm_t *kd, struct filestat *fst,
     struct vnstat *vn, char *errbuf)
 {
 	/* Filesystem specific handlers. */
 	#define FSTYPE(fst)     {#fst, fst##_filestat}
 	struct {
 		const char	*tag;
 		int		(*handler)(kvm_t *kd, struct vnode *vp,
 		    struct vnstat *vn);
 	} fstypes[] = {
 		FSTYPE(devfs),
 		FSTYPE(isofs),
 		FSTYPE(msdosfs),
 		FSTYPE(nfs),
 		FSTYPE(smbfs),
 		FSTYPE(udf), 
 		FSTYPE(ufs),
 #ifdef LIBPROCSTAT_ZFS
 		FSTYPE(zfs),
 #endif
 	};
 #define	NTYPES	(sizeof(fstypes) / sizeof(*fstypes))
 	struct vnode vnode;
 	char tagstr[12];
 	void *vp;
 	int error;
 	unsigned int i;
 
 	assert(kd);
 	assert(vn);
 	assert(fst);
 	vp = fst->fs_typedep;
 	if (vp == NULL)
 		goto fail;
 	error = kvm_read_all(kd, (unsigned long)vp, &vnode, sizeof(vnode));
 	if (error == 0) {
 		warnx("can't read vnode at %p", (void *)vp);
 		goto fail;
 	}
 	bzero(vn, sizeof(*vn));
 	vn->vn_type = vntype2psfsttype(vnode.v_type);
 	if (vnode.v_type == VNON || vnode.v_type == VBAD)
 		return (0);
 	error = kvm_read_all(kd, (unsigned long)vnode.v_tag, tagstr,
 	    sizeof(tagstr));
 	if (error == 0) {
 		warnx("can't read v_tag at %p", (void *)vp);
 		goto fail;
 	}
 	tagstr[sizeof(tagstr) - 1] = '\0';
 
 	/*
 	 * Find appropriate handler.
 	 */
 	for (i = 0; i < NTYPES; i++)
 		if (!strcmp(fstypes[i].tag, tagstr)) {
 			if (fstypes[i].handler(kd, &vnode, vn) != 0) {
 				goto fail;
 			}
 			break;
 		}
 	if (i == NTYPES) {
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "?(%s)", tagstr);
 		return (1);
 	}
 	vn->vn_mntdir = getmnton(kd, vnode.v_mount);
 	if ((vnode.v_type == VBLK || vnode.v_type == VCHR) &&
 	    vnode.v_rdev != NULL){
 		vn->vn_dev = dev2udev(kd, vnode.v_rdev);
 		(void)kdevtoname(kd, vnode.v_rdev, vn->vn_devname);
 	} else {
 		vn->vn_dev = -1;
 	}
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 /*
  * kinfo vnode type to filestat translation.
  */
 static int
 kinfo_vtype2fst(int kfvtype)
 {
 	static struct {
 		int	kf_vtype; 
 		int	fst_vtype;
 	} kfvtypes2fst[] = {
 		{ KF_VTYPE_VBAD, PS_FST_VTYPE_VBAD },
 		{ KF_VTYPE_VBLK, PS_FST_VTYPE_VBLK },
 		{ KF_VTYPE_VCHR, PS_FST_VTYPE_VCHR },
 		{ KF_VTYPE_VDIR, PS_FST_VTYPE_VDIR },
 		{ KF_VTYPE_VFIFO, PS_FST_VTYPE_VFIFO },
 		{ KF_VTYPE_VLNK, PS_FST_VTYPE_VLNK },
 		{ KF_VTYPE_VNON, PS_FST_VTYPE_VNON },
 		{ KF_VTYPE_VREG, PS_FST_VTYPE_VREG },
 		{ KF_VTYPE_VSOCK, PS_FST_VTYPE_VSOCK }
 	};
 #define	NKFVTYPES	(sizeof(kfvtypes2fst) / sizeof(*kfvtypes2fst))
 	unsigned int i;
 
 	for (i = 0; i < NKFVTYPES; i++)
 		if (kfvtypes2fst[i].kf_vtype == kfvtype)
 			break;
 	if (i == NKFVTYPES)
 		return (PS_FST_VTYPE_UNKNOWN);
 	return (kfvtypes2fst[i].fst_vtype);
 }
 
 static int
 procstat_get_vnode_info_sysctl(struct filestat *fst, struct vnstat *vn,
     char *errbuf)
 {
 	struct statfs stbuf;
 	struct kinfo_file *kif;
 	struct kinfo_vmentry *kve;
+	char *name, *path;
 	uint64_t fileid;
 	uint64_t size;
-	char *name, *path;
-	uint32_t fsid;
+	uint64_t fsid;
+	uint64_t rdev;
 	uint16_t mode;
-	uint32_t rdev;
 	int vntype;
 	int status;
 
 	assert(fst);
 	assert(vn);
 	bzero(vn, sizeof(*vn));
 	if (fst->fs_typedep == NULL)
 		return (1);
 	if (fst->fs_uflags & PS_FST_UFLAG_MMAP) {
 		kve = fst->fs_typedep;
 		fileid = kve->kve_vn_fileid;
 		fsid = kve->kve_vn_fsid;
 		mode = kve->kve_vn_mode;
 		path = kve->kve_path;
 		rdev = kve->kve_vn_rdev;
 		size = kve->kve_vn_size;
 		vntype = kinfo_vtype2fst(kve->kve_vn_type);
 		status = kve->kve_status;
 	} else {
 		kif = fst->fs_typedep;
 		fileid = kif->kf_un.kf_file.kf_file_fileid;
 		fsid = kif->kf_un.kf_file.kf_file_fsid;
 		mode = kif->kf_un.kf_file.kf_file_mode;
 		path = kif->kf_path;
 		rdev = kif->kf_un.kf_file.kf_file_rdev;
 		size = kif->kf_un.kf_file.kf_file_size;
 		vntype = kinfo_vtype2fst(kif->kf_vnode_type);
 		status = kif->kf_status;
 	}
 	vn->vn_type = vntype;
 	if (vntype == PS_FST_VTYPE_VNON || vntype == PS_FST_VTYPE_VBAD)
 		return (0);
 	if ((status & KF_ATTR_VALID) == 0) {
 		if (errbuf != NULL) {
 			snprintf(errbuf, _POSIX2_LINE_MAX,
 			    "? (no info available)");
 		}
 		return (1);
 	}
 	if (path && *path) {
 		statfs(path, &stbuf);
 		vn->vn_mntdir = strdup(stbuf.f_mntonname);
 	} else
 		vn->vn_mntdir = strdup("-");
 	vn->vn_dev = rdev;
 	if (vntype == PS_FST_VTYPE_VBLK) {
 		name = devname(rdev, S_IFBLK);
 		if (name != NULL)
 			strlcpy(vn->vn_devname, name,
 			    sizeof(vn->vn_devname));
 	} else if (vntype == PS_FST_VTYPE_VCHR) {
 		name = devname(vn->vn_dev, S_IFCHR);
 		if (name != NULL)
 			strlcpy(vn->vn_devname, name,
 			    sizeof(vn->vn_devname));
 	}
 	vn->vn_fsid = fsid;
 	vn->vn_fileid = fileid;
 	vn->vn_size = size;
 	vn->vn_mode = mode;
 	return (0);
 }
 
 int
 procstat_get_socket_info(struct procstat *procstat, struct filestat *fst,
     struct sockstat *sock, char *errbuf)
 {
 
 	assert(sock);
 	if (procstat->type == PROCSTAT_KVM) {
 		return (procstat_get_socket_info_kvm(procstat->kd, fst, sock,
 		    errbuf));
 	} else if (procstat->type == PROCSTAT_SYSCTL ||
 		procstat->type == PROCSTAT_CORE) {
 		return (procstat_get_socket_info_sysctl(fst, sock, errbuf));
 	} else {
 		warnx("unknown access method: %d", procstat->type);
 		if (errbuf != NULL)
 			snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 		return (1);
 	}
 }
 
 static int
 procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst,
     struct sockstat *sock, char *errbuf)
 {
 	struct domain dom;
 	struct inpcb inpcb;
 	struct protosw proto;
 	struct socket s;
 	struct unpcb unpcb;
 	ssize_t len;
 	void *so;
 
 	assert(kd);
 	assert(sock);
 	assert(fst);
 	bzero(sock, sizeof(*sock));
 	so = fst->fs_typedep;
 	if (so == NULL)
 		goto fail;
 	sock->so_addr = (uintptr_t)so;
 	/* fill in socket */
 	if (!kvm_read_all(kd, (unsigned long)so, &s,
 	    sizeof(struct socket))) {
 		warnx("can't read sock at %p", (void *)so);
 		goto fail;
 	}
 	/* fill in protosw entry */
 	if (!kvm_read_all(kd, (unsigned long)s.so_proto, &proto,
 	    sizeof(struct protosw))) {
 		warnx("can't read protosw at %p", (void *)s.so_proto);
 		goto fail;
 	}
 	/* fill in domain */
 	if (!kvm_read_all(kd, (unsigned long)proto.pr_domain, &dom,
 	    sizeof(struct domain))) {
 		warnx("can't read domain at %p",
 		    (void *)proto.pr_domain);
 		goto fail;
 	}
 	if ((len = kvm_read(kd, (unsigned long)dom.dom_name, sock->dname,
 	    sizeof(sock->dname) - 1)) < 0) {
 		warnx("can't read domain name at %p", (void *)dom.dom_name);
 		sock->dname[0] = '\0';
 	}
 	else
 		sock->dname[len] = '\0';
 	
 	/*
 	 * Fill in known data.
 	 */
 	sock->type = s.so_type;
 	sock->proto = proto.pr_protocol;
 	sock->dom_family = dom.dom_family;
 	sock->so_pcb = (uintptr_t)s.so_pcb;
 
 	/*
 	 * Protocol specific data.
 	 */
 	switch(dom.dom_family) {
 	case AF_INET:
 	case AF_INET6:
 		if (proto.pr_protocol == IPPROTO_TCP) {
 			if (s.so_pcb) {
 				if (kvm_read(kd, (u_long)s.so_pcb,
 				    (char *)&inpcb, sizeof(struct inpcb))
 				    != sizeof(struct inpcb)) {
 					warnx("can't read inpcb at %p",
 					    (void *)s.so_pcb);
 				} else
 					sock->inp_ppcb =
 					    (uintptr_t)inpcb.inp_ppcb;
 			}
 		}
 		break;
 	case AF_UNIX:
 		if (s.so_pcb) {
 			if (kvm_read(kd, (u_long)s.so_pcb, (char *)&unpcb,
 			    sizeof(struct unpcb)) != sizeof(struct unpcb)){
 				warnx("can't read unpcb at %p",
 				    (void *)s.so_pcb);
 			} else if (unpcb.unp_conn) {
 				sock->so_rcv_sb_state = s.so_rcv.sb_state;
 				sock->so_snd_sb_state = s.so_snd.sb_state;
 				sock->unp_conn = (uintptr_t)unpcb.unp_conn;
 			}
 		}
 		break;
 	default:
 		break;
 	}
 	return (0);
 
 fail:
 	if (errbuf != NULL)
 		snprintf(errbuf, _POSIX2_LINE_MAX, "error");
 	return (1);
 }
 
 static int
 procstat_get_socket_info_sysctl(struct filestat *fst, struct sockstat *sock,
     char *errbuf __unused)
 {
 	struct kinfo_file *kif;
 
 	assert(sock);
 	assert(fst);
 	bzero(sock, sizeof(*sock));
 	kif = fst->fs_typedep;
 	if (kif == NULL)
 		return (0);
 
 	/*
 	 * Fill in known data.
 	 */
 	sock->type = kif->kf_sock_type;
 	sock->proto = kif->kf_sock_protocol;
 	sock->dom_family = kif->kf_sock_domain;
 	sock->so_pcb = kif->kf_un.kf_sock.kf_sock_pcb;
 	strlcpy(sock->dname, kif->kf_path, sizeof(sock->dname));
-	bcopy(&kif->kf_sa_local, &sock->sa_local, kif->kf_sa_local.ss_len);
-	bcopy(&kif->kf_sa_peer, &sock->sa_peer, kif->kf_sa_peer.ss_len);
+	bcopy(&kif->kf_un.kf_sock.kf_sa_local, &sock->sa_local,
+	    kif->kf_un.kf_sock.kf_sa_local.ss_len);
+	bcopy(&kif->kf_un.kf_sock.kf_sa_peer, &sock->sa_peer,
+	    kif->kf_un.kf_sock.kf_sa_peer.ss_len);
 
 	/*
 	 * Protocol specific data.
 	 */
 	switch(sock->dom_family) {
 	case AF_INET:
 	case AF_INET6:
 		if (sock->proto == IPPROTO_TCP)
 			sock->inp_ppcb = kif->kf_un.kf_sock.kf_sock_inpcb;
 		break;
 	case AF_UNIX:
 		if (kif->kf_un.kf_sock.kf_sock_unpconn != 0) {
 				sock->so_rcv_sb_state =
 				    kif->kf_un.kf_sock.kf_sock_rcv_sb_state;
 				sock->so_snd_sb_state =
 				    kif->kf_un.kf_sock.kf_sock_snd_sb_state;
 				sock->unp_conn =
 				    kif->kf_un.kf_sock.kf_sock_unpconn;
 		}
 		break;
 	default:
 		break;
 	}
 	return (0);
 }
 
 /*
  * Descriptor flags to filestat translation.
  */
 static int
 to_filestat_flags(int flags)
 {
 	static struct {
 		int flag;
 		int fst_flag;
 	} fstflags[] = {
 		{ FREAD, PS_FST_FFLAG_READ },
 		{ FWRITE, PS_FST_FFLAG_WRITE },
 		{ O_APPEND, PS_FST_FFLAG_APPEND },
 		{ O_ASYNC, PS_FST_FFLAG_ASYNC },
 		{ O_CREAT, PS_FST_FFLAG_CREAT },
 		{ O_DIRECT, PS_FST_FFLAG_DIRECT },
 		{ O_EXCL, PS_FST_FFLAG_EXCL },
 		{ O_EXEC, PS_FST_FFLAG_EXEC },
 		{ O_EXLOCK, PS_FST_FFLAG_EXLOCK },
 		{ O_NOFOLLOW, PS_FST_FFLAG_NOFOLLOW },
 		{ O_NONBLOCK, PS_FST_FFLAG_NONBLOCK },
 		{ O_SHLOCK, PS_FST_FFLAG_SHLOCK },
 		{ O_SYNC, PS_FST_FFLAG_SYNC },
 		{ O_TRUNC, PS_FST_FFLAG_TRUNC }
 	};
 #define NFSTFLAGS	(sizeof(fstflags) / sizeof(*fstflags))
 	int fst_flags;
 	unsigned int i;
 
 	fst_flags = 0;
 	for (i = 0; i < NFSTFLAGS; i++)
 		if (flags & fstflags[i].flag)
 			fst_flags |= fstflags[i].fst_flag;
 	return (fst_flags);
 }
 
 /*
  * Vnode type to filestate translation.
  */
 static int
 vntype2psfsttype(int type)
 {
 	static struct {
 		int	vtype; 
 		int	fst_vtype;
 	} vt2fst[] = {
 		{ VBAD, PS_FST_VTYPE_VBAD },
 		{ VBLK, PS_FST_VTYPE_VBLK },
 		{ VCHR, PS_FST_VTYPE_VCHR },
 		{ VDIR, PS_FST_VTYPE_VDIR },
 		{ VFIFO, PS_FST_VTYPE_VFIFO },
 		{ VLNK, PS_FST_VTYPE_VLNK },
 		{ VNON, PS_FST_VTYPE_VNON },
 		{ VREG, PS_FST_VTYPE_VREG },
 		{ VSOCK, PS_FST_VTYPE_VSOCK }
 	};
 #define	NVFTYPES	(sizeof(vt2fst) / sizeof(*vt2fst))
 	unsigned int i, fst_type;
 
 	fst_type = PS_FST_VTYPE_UNKNOWN;
 	for (i = 0; i < NVFTYPES; i++) {
 		if (type == vt2fst[i].vtype) {
 			fst_type = vt2fst[i].fst_vtype;
 			break;
 		}
 	}
 	return (fst_type);
 }
 
 static char *
 getmnton(kvm_t *kd, struct mount *m)
 {
 	struct mount mnt;
 	static struct mtab {
 		struct mtab *next;
 		struct mount *m;
 		char mntonname[MNAMELEN + 1];
 	} *mhead = NULL;
 	struct mtab *mt;
 
 	for (mt = mhead; mt != NULL; mt = mt->next)
 		if (m == mt->m)
 			return (mt->mntonname);
 	if (!kvm_read_all(kd, (unsigned long)m, &mnt, sizeof(struct mount))) {
 		warnx("can't read mount table at %p", (void *)m);
 		return (NULL);
 	}
 	if ((mt = malloc(sizeof (struct mtab))) == NULL)
 		err(1, NULL);
 	mt->m = m;
 	bcopy(&mnt.mnt_stat.f_mntonname[0], &mt->mntonname[0], MNAMELEN);
 	mt->mntonname[MNAMELEN] = '\0';
 	mt->next = mhead;
 	mhead = mt;
 	return (mt->mntonname);
 }
 
 /*
  * Auxiliary structures and functions to get process environment or
  * command line arguments.
  */
 struct argvec {
 	char	*buf;
 	size_t	bufsize;
 	char	**argv;
 	size_t	argc;
 };
 
 static struct argvec *
 argvec_alloc(size_t bufsize)
 {
 	struct argvec *av;
 
 	av = malloc(sizeof(*av));
 	if (av == NULL)
 		return (NULL);
 	av->bufsize = bufsize;
 	av->buf = malloc(av->bufsize);
 	if (av->buf == NULL) {
 		free(av);
 		return (NULL);
 	}
 	av->argc = 32;
 	av->argv = malloc(sizeof(char *) * av->argc);
 	if (av->argv == NULL) {
 		free(av->buf);
 		free(av);
 		return (NULL);
 	}
 	return av;
 }
 
 static void
 argvec_free(struct argvec * av)
 {
 
 	free(av->argv);
 	free(av->buf);
 	free(av);
 }
 
 static char **
 getargv(struct procstat *procstat, struct kinfo_proc *kp, size_t nchr, int env)
 {
 	int error, name[4], argc, i;
 	struct argvec *av, **avp;
 	enum psc_type type;
 	size_t len;
 	char *p, **argv;
 
 	assert(procstat);
 	assert(kp);
 	if (procstat->type == PROCSTAT_KVM) {
 		warnx("can't use kvm access method");
 		return (NULL);
 	}
 	if (procstat->type != PROCSTAT_SYSCTL &&
 	    procstat->type != PROCSTAT_CORE) {
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 
 	if (nchr == 0 || nchr > ARG_MAX)
 		nchr = ARG_MAX;
 
 	avp = (struct argvec **)(env ? &procstat->argv : &procstat->envv);
 	av = *avp;
 
 	if (av == NULL)
 	{
 		av = argvec_alloc(nchr);
 		if (av == NULL)
 		{
 			warn("malloc(%zu)", nchr);
 			return (NULL);
 		}
 		*avp = av;
 	} else if (av->bufsize < nchr) {
 		av->buf = reallocf(av->buf, nchr);
 		if (av->buf == NULL) {
 			warn("malloc(%zu)", nchr);
 			return (NULL);
 		}
 	}
 	if (procstat->type == PROCSTAT_SYSCTL) {
 		name[0] = CTL_KERN;
 		name[1] = KERN_PROC;
 		name[2] = env ? KERN_PROC_ENV : KERN_PROC_ARGS;
 		name[3] = kp->ki_pid;
 		len = nchr;
 		error = sysctl(name, nitems(name), av->buf, &len, NULL, 0);
 		if (error != 0 && errno != ESRCH && errno != EPERM)
 			warn("sysctl(kern.proc.%s)", env ? "env" : "args");
 		if (error != 0 || len == 0)
 			return (NULL);
 	} else /* procstat->type == PROCSTAT_CORE */ {
 		type = env ? PSC_TYPE_ENVV : PSC_TYPE_ARGV;
 		len = nchr;
 		if (procstat_core_get(procstat->core, type, av->buf, &len)
 		    == NULL) {
 			return (NULL);
 		}
 	}
 
 	argv = av->argv;
 	argc = av->argc;
 	i = 0;
 	for (p = av->buf; p < av->buf + len; p += strlen(p) + 1) {
 		argv[i++] = p;
 		if (i < argc)
 			continue;
 		/* Grow argv. */
 		argc += argc;
 		argv = realloc(argv, sizeof(char *) * argc);
 		if (argv == NULL) {
 			warn("malloc(%zu)", sizeof(char *) * argc);
 			return (NULL);
 		}
 		av->argv = argv;
 		av->argc = argc;
 	}
 	argv[i] = NULL;
 
 	return (argv);
 }
 
 /*
  * Return process command line arguments.
  */
 char **
 procstat_getargv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr)
 {
 
 	return (getargv(procstat, p, nchr, 0));
 }
 
 /*
  * Free the buffer allocated by procstat_getargv().
  */
 void
 procstat_freeargv(struct procstat *procstat)
 {
 
 	if (procstat->argv != NULL) {
 		argvec_free(procstat->argv);
 		procstat->argv = NULL;
 	}
 }
 
 /*
  * Return process environment.
  */
 char **
 procstat_getenvv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr)
 {
 
 	return (getargv(procstat, p, nchr, 1));
 }
 
 /*
  * Free the buffer allocated by procstat_getenvv().
  */
 void
 procstat_freeenvv(struct procstat *procstat)
 {
 	if (procstat->envv != NULL) {
 		argvec_free(procstat->envv);
 		procstat->envv = NULL;
 	}
 }
 
 static struct kinfo_vmentry *
 kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
 {
 	int cnt;
 	size_t len;
 	char *buf, *bp, *eb;
 	struct kinfo_vmentry *kiv, *kp, *kv;
 
 	buf = procstat_core_get(core, PSC_TYPE_VMMAP, NULL, &len);
 	if (buf == NULL)
 		return (NULL);
 
 	/*
 	 * XXXMG: The code below is just copy&past from libutil.
 	 * The code duplication can be avoided if libutil
 	 * is extended to provide something like:
 	 *   struct kinfo_vmentry *kinfo_getvmmap_from_buf(const char *buf,
 	 *       size_t len, int *cntp);
 	 */
 
 	/* Pass 1: count items */
 	cnt = 0;
 	bp = buf;
 	eb = buf + len;
 	while (bp < eb) {
 		kv = (struct kinfo_vmentry *)(uintptr_t)bp;
 		if (kv->kve_structsize == 0)
 			break;
 		bp += kv->kve_structsize;
 		cnt++;
 	}
 
 	kiv = calloc(cnt, sizeof(*kiv));
 	if (kiv == NULL) {
 		free(buf);
 		return (NULL);
 	}
 	bp = buf;
 	eb = buf + len;
 	kp = kiv;
 	/* Pass 2: unpack */
 	while (bp < eb) {
 		kv = (struct kinfo_vmentry *)(uintptr_t)bp;
 		if (kv->kve_structsize == 0)
 			break;
 		/* Copy/expand into pre-zeroed buffer */
 		memcpy(kp, kv, kv->kve_structsize);
 		/* Advance to next packed record */
 		bp += kv->kve_structsize;
 		/* Set field size to fixed length, advance */
 		kp->kve_structsize = sizeof(*kp);
 		kp++;
 	}
 	free(buf);
 	*cntp = cnt;
 	return (kiv);	/* Caller must free() return value */
 }
 
 struct kinfo_vmentry *
 procstat_getvmmap(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned int *cntp)
 {
 
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		warnx("kvm method is not supported");
 		return (NULL);
 	case PROCSTAT_SYSCTL:
 		return (kinfo_getvmmap(kp->ki_pid, cntp));
 	case PROCSTAT_CORE:
 		return (kinfo_getvmmap_core(procstat->core, cntp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freevmmap(struct procstat *procstat __unused,
     struct kinfo_vmentry *vmmap)
 {
 
 	free(vmmap);
 }
 
 static gid_t *
 procstat_getgroups_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned int *cntp)
 {
 	struct proc proc;
 	struct ucred ucred;
 	gid_t *groups;
 	size_t len;
 
 	assert(kd != NULL);
 	assert(kp != NULL);
 	if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc,
 	    sizeof(proc))) {
 		warnx("can't read proc struct at %p for pid %d",
 		    kp->ki_paddr, kp->ki_pid);
 		return (NULL);
 	}
 	if (proc.p_ucred == NOCRED)
 		return (NULL);
 	if (!kvm_read_all(kd, (unsigned long)proc.p_ucred, &ucred,
 	    sizeof(ucred))) {
 		warnx("can't read ucred struct at %p for pid %d",
 		    proc.p_ucred, kp->ki_pid);
 		return (NULL);
 	}
 	len = ucred.cr_ngroups * sizeof(gid_t);
 	groups = malloc(len);
 	if (groups == NULL) {
 		warn("malloc(%zu)", len);
 		return (NULL);
 	}
 	if (!kvm_read_all(kd, (unsigned long)ucred.cr_groups, groups, len)) {
 		warnx("can't read groups at %p for pid %d",
 		    ucred.cr_groups, kp->ki_pid);
 		free(groups);
 		return (NULL);
 	}
 	*cntp = ucred.cr_ngroups;
 	return (groups);
 }
 
 static gid_t *
 procstat_getgroups_sysctl(pid_t pid, unsigned int *cntp)
 {
 	int mib[4];
 	size_t len;
 	gid_t *groups;
 
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_PROC;
 	mib[2] = KERN_PROC_GROUPS;
 	mib[3] = pid;
 	len = (sysconf(_SC_NGROUPS_MAX) + 1) * sizeof(gid_t);
 	groups = malloc(len);
 	if (groups == NULL) {
 		warn("malloc(%zu)", len);
 		return (NULL);
 	}
 	if (sysctl(mib, nitems(mib), groups, &len, NULL, 0) == -1) {
 		warn("sysctl: kern.proc.groups: %d", pid);
 		free(groups);
 		return (NULL);
 	}
 	*cntp = len / sizeof(gid_t);
 	return (groups);
 }
 
 static gid_t *
 procstat_getgroups_core(struct procstat_core *core, unsigned int *cntp)
 {
 	size_t len;
 	gid_t *groups;
 
 	groups = procstat_core_get(core, PSC_TYPE_GROUPS, NULL, &len);
 	if (groups == NULL)
 		return (NULL);
 	*cntp = len / sizeof(gid_t);
 	return (groups);
 }
 
 gid_t *
 procstat_getgroups(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned int *cntp)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		return (procstat_getgroups_kvm(procstat->kd, kp, cntp));
 	case PROCSTAT_SYSCTL:
 		return (procstat_getgroups_sysctl(kp->ki_pid, cntp));
 	case PROCSTAT_CORE:
 		return (procstat_getgroups_core(procstat->core, cntp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freegroups(struct procstat *procstat __unused, gid_t *groups)
 {
 
 	free(groups);
 }
 
 static int
 procstat_getumask_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned short *maskp)
 {
 	struct filedesc fd;
 
 	assert(kd != NULL);
 	assert(kp != NULL);
 	if (kp->ki_fd == NULL)
 		return (-1);
 	if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &fd, sizeof(fd))) {
 		warnx("can't read filedesc at %p for pid %d", kp->ki_fd,
 		    kp->ki_pid);
 		return (-1);
 	}
 	*maskp = fd.fd_cmask;
 	return (0);
 }
 
 static int
 procstat_getumask_sysctl(pid_t pid, unsigned short *maskp)
 {
 	int error;
 	int mib[4];
 	size_t len;
 
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_PROC;
 	mib[2] = KERN_PROC_UMASK;
 	mib[3] = pid;
 	len = sizeof(*maskp);
 	error = sysctl(mib, nitems(mib), maskp, &len, NULL, 0);
 	if (error != 0 && errno != ESRCH && errno != EPERM)
 		warn("sysctl: kern.proc.umask: %d", pid);
 	return (error);
 }
 
 static int
 procstat_getumask_core(struct procstat_core *core, unsigned short *maskp)
 {
 	size_t len;
 	unsigned short *buf;
 
 	buf = procstat_core_get(core, PSC_TYPE_UMASK, NULL, &len);
 	if (buf == NULL)
 		return (-1);
 	if (len < sizeof(*maskp)) {
 		free(buf);
 		return (-1);
 	}
 	*maskp = *buf;
 	free(buf);
 	return (0);
 }
 
 int
 procstat_getumask(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned short *maskp)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		return (procstat_getumask_kvm(procstat->kd, kp, maskp));
 	case PROCSTAT_SYSCTL:
 		return (procstat_getumask_sysctl(kp->ki_pid, maskp));
 	case PROCSTAT_CORE:
 		return (procstat_getumask_core(procstat->core, maskp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (-1);
 	}
 }
 
 static int
 procstat_getrlimit_kvm(kvm_t *kd, struct kinfo_proc *kp, int which,
     struct rlimit* rlimit)
 {
 	struct proc proc;
 	unsigned long offset;
 
 	assert(kd != NULL);
 	assert(kp != NULL);
 	assert(which >= 0 && which < RLIM_NLIMITS);
 	if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc,
 	    sizeof(proc))) {
 		warnx("can't read proc struct at %p for pid %d",
 		    kp->ki_paddr, kp->ki_pid);
 		return (-1);
 	}
 	if (proc.p_limit == NULL)
 		return (-1);
 	offset = (unsigned long)proc.p_limit + sizeof(struct rlimit) * which;
 	if (!kvm_read_all(kd, offset, rlimit, sizeof(*rlimit))) {
 		warnx("can't read rlimit struct at %p for pid %d",
 		    (void *)offset, kp->ki_pid);
 		return (-1);
 	}
 	return (0);
 }
 
 static int
 procstat_getrlimit_sysctl(pid_t pid, int which, struct rlimit* rlimit)
 {
 	int error, name[5];
 	size_t len;
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_RLIMIT;
 	name[3] = pid;
 	name[4] = which;
 	len = sizeof(struct rlimit);
 	error = sysctl(name, nitems(name), rlimit, &len, NULL, 0);
 	if (error < 0 && errno != ESRCH) {
 		warn("sysctl: kern.proc.rlimit: %d", pid);
 		return (-1);
 	}
 	if (error < 0 || len != sizeof(struct rlimit))
 		return (-1);
 	return (0);
 }
 
 static int
 procstat_getrlimit_core(struct procstat_core *core, int which,
     struct rlimit* rlimit)
 {
 	size_t len;
 	struct rlimit* rlimits;
 
 	if (which < 0 || which >= RLIM_NLIMITS) {
 		errno = EINVAL;
 		warn("getrlimit: which");
 		return (-1);
 	}
 	rlimits = procstat_core_get(core, PSC_TYPE_RLIMIT, NULL, &len);
 	if (rlimits == NULL)
 		return (-1);
 	if (len < sizeof(struct rlimit) * RLIM_NLIMITS) {
 		free(rlimits);
 		return (-1);
 	}
 	*rlimit = rlimits[which];
 	return (0);
 }
 
 int
 procstat_getrlimit(struct procstat *procstat, struct kinfo_proc *kp, int which,
     struct rlimit* rlimit)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		return (procstat_getrlimit_kvm(procstat->kd, kp, which,
 		    rlimit));
 	case PROCSTAT_SYSCTL:
 		return (procstat_getrlimit_sysctl(kp->ki_pid, which, rlimit));
 	case PROCSTAT_CORE:
 		return (procstat_getrlimit_core(procstat->core, which, rlimit));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (-1);
 	}
 }
 
 static int
 procstat_getpathname_sysctl(pid_t pid, char *pathname, size_t maxlen)
 {
 	int error, name[4];
 	size_t len;
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_PATHNAME;
 	name[3] = pid;
 	len = maxlen;
 	error = sysctl(name, nitems(name), pathname, &len, NULL, 0);
 	if (error != 0 && errno != ESRCH)
 		warn("sysctl: kern.proc.pathname: %d", pid);
 	if (len == 0)
 		pathname[0] = '\0';
 	return (error);
 }
 
 static int
 procstat_getpathname_core(struct procstat_core *core, char *pathname,
     size_t maxlen)
 {
 	struct kinfo_file *files;
 	int cnt, i, result;
 
 	files = kinfo_getfile_core(core, &cnt);
 	if (files == NULL)
 		return (-1);
 	result = -1;
 	for (i = 0; i < cnt; i++) {
 		if (files[i].kf_fd != KF_FD_TYPE_TEXT)
 			continue;
 		strncpy(pathname, files[i].kf_path, maxlen);
 		result = 0;
 		break;
 	}
 	free(files);
 	return (result);
 }
 
 int
 procstat_getpathname(struct procstat *procstat, struct kinfo_proc *kp,
     char *pathname, size_t maxlen)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		/* XXX: Return empty string. */
 		if (maxlen > 0)
 			pathname[0] = '\0';
 		return (0);
 	case PROCSTAT_SYSCTL:
 		return (procstat_getpathname_sysctl(kp->ki_pid, pathname,
 		    maxlen));
 	case PROCSTAT_CORE:
 		return (procstat_getpathname_core(procstat->core, pathname,
 		    maxlen));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (-1);
 	}
 }
 
 static int
 procstat_getosrel_kvm(kvm_t *kd, struct kinfo_proc *kp, int *osrelp)
 {
 	struct proc proc;
 
 	assert(kd != NULL);
 	assert(kp != NULL);
 	if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc,
 	    sizeof(proc))) {
 		warnx("can't read proc struct at %p for pid %d",
 		    kp->ki_paddr, kp->ki_pid);
 		return (-1);
 	}
 	*osrelp = proc.p_osrel;
 	return (0);
 }
 
 static int
 procstat_getosrel_sysctl(pid_t pid, int *osrelp)
 {
 	int error, name[4];
 	size_t len;
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_OSREL;
 	name[3] = pid;
 	len = sizeof(*osrelp);
 	error = sysctl(name, nitems(name), osrelp, &len, NULL, 0);
 	if (error != 0 && errno != ESRCH)
 		warn("sysctl: kern.proc.osrel: %d", pid);
 	return (error);
 }
 
 static int
 procstat_getosrel_core(struct procstat_core *core, int *osrelp)
 {
 	size_t len;
 	int *buf;
 
 	buf = procstat_core_get(core, PSC_TYPE_OSREL, NULL, &len);
 	if (buf == NULL)
 		return (-1);
 	if (len < sizeof(*osrelp)) {
 		free(buf);
 		return (-1);
 	}
 	*osrelp = *buf;
 	free(buf);
 	return (0);
 }
 
 int
 procstat_getosrel(struct procstat *procstat, struct kinfo_proc *kp, int *osrelp)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		return (procstat_getosrel_kvm(procstat->kd, kp, osrelp));
 	case PROCSTAT_SYSCTL:
 		return (procstat_getosrel_sysctl(kp->ki_pid, osrelp));
 	case PROCSTAT_CORE:
 		return (procstat_getosrel_core(procstat->core, osrelp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (-1);
 	}
 }
 
 #define PROC_AUXV_MAX	256
 
 #if __ELF_WORD_SIZE == 64
 static const char *elf32_sv_names[] = {
 	"Linux ELF32",
 	"FreeBSD ELF32",
 };
 
 static int
 is_elf32_sysctl(pid_t pid)
 {
 	int error, name[4];
 	size_t len, i;
 	static char sv_name[256];
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_SV_NAME;
 	name[3] = pid;
 	len = sizeof(sv_name);
 	error = sysctl(name, nitems(name), sv_name, &len, NULL, 0);
 	if (error != 0 || len == 0)
 		return (0);
 	for (i = 0; i < sizeof(elf32_sv_names) / sizeof(*elf32_sv_names); i++) {
 		if (strncmp(sv_name, elf32_sv_names[i], sizeof(sv_name)) == 0)
 			return (1);
 	}
 	return (0);
 }
 
 static Elf_Auxinfo *
 procstat_getauxv32_sysctl(pid_t pid, unsigned int *cntp)
 {
 	Elf_Auxinfo *auxv;
 	Elf32_Auxinfo *auxv32;
 	void *ptr;
 	size_t len;
 	unsigned int i, count;
 	int name[4];
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_AUXV;
 	name[3] = pid;
 	len = PROC_AUXV_MAX * sizeof(Elf32_Auxinfo);
 	auxv = NULL;
 	auxv32 = malloc(len);
 	if (auxv32 == NULL) {
 		warn("malloc(%zu)", len);
 		goto out;
 	}
 	if (sysctl(name, nitems(name), auxv32, &len, NULL, 0) == -1) {
 		if (errno != ESRCH && errno != EPERM)
 			warn("sysctl: kern.proc.auxv: %d: %d", pid, errno);
 		goto out;
 	}
 	count = len / sizeof(Elf_Auxinfo);
 	auxv = malloc(count  * sizeof(Elf_Auxinfo));
 	if (auxv == NULL) {
 		warn("malloc(%zu)", count * sizeof(Elf_Auxinfo));
 		goto out;
 	}
 	for (i = 0; i < count; i++) {
 		/*
 		 * XXX: We expect that values for a_type on a 32-bit platform
 		 * are directly mapped to values on 64-bit one, which is not
 		 * necessarily true.
 		 */
 		auxv[i].a_type = auxv32[i].a_type;
 		ptr = &auxv32[i].a_un;
 		auxv[i].a_un.a_val = *((uint32_t *)ptr);
 	}
 	*cntp = count;
 out:
 	free(auxv32);
 	return (auxv);
 }
 #endif /* __ELF_WORD_SIZE == 64 */
 
 static Elf_Auxinfo *
 procstat_getauxv_sysctl(pid_t pid, unsigned int *cntp)
 {
 	Elf_Auxinfo *auxv;
 	int name[4];
 	size_t len;
 
 #if __ELF_WORD_SIZE == 64
 	if (is_elf32_sysctl(pid))
 		return (procstat_getauxv32_sysctl(pid, cntp));
 #endif
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_AUXV;
 	name[3] = pid;
 	len = PROC_AUXV_MAX * sizeof(Elf_Auxinfo);
 	auxv = malloc(len);
 	if (auxv == NULL) {
 		warn("malloc(%zu)", len);
 		return (NULL);
 	}
 	if (sysctl(name, nitems(name), auxv, &len, NULL, 0) == -1) {
 		if (errno != ESRCH && errno != EPERM)
 			warn("sysctl: kern.proc.auxv: %d: %d", pid, errno);
 		free(auxv);
 		return (NULL);
 	}
 	*cntp = len / sizeof(Elf_Auxinfo);
 	return (auxv);
 }
 
 static Elf_Auxinfo *
 procstat_getauxv_core(struct procstat_core *core, unsigned int *cntp)
 {
 	Elf_Auxinfo *auxv;
 	size_t len;
 
 	auxv = procstat_core_get(core, PSC_TYPE_AUXV, NULL, &len);
 	if (auxv == NULL)
 		return (NULL);
 	*cntp = len / sizeof(Elf_Auxinfo);
 	return (auxv);
 }
 
 Elf_Auxinfo *
 procstat_getauxv(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned int *cntp)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		warnx("kvm method is not supported");
 		return (NULL);
 	case PROCSTAT_SYSCTL:
 		return (procstat_getauxv_sysctl(kp->ki_pid, cntp));
 	case PROCSTAT_CORE:
 		return (procstat_getauxv_core(procstat->core, cntp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freeauxv(struct procstat *procstat __unused, Elf_Auxinfo *auxv)
 {
 
 	free(auxv);
 }
 
 static struct ptrace_lwpinfo *
 procstat_getptlwpinfo_core(struct procstat_core *core, unsigned int *cntp)
 {
 	void *buf;
 	struct ptrace_lwpinfo *pl;
 	unsigned int cnt;
 	size_t len;
 
 	cnt = procstat_core_note_count(core, PSC_TYPE_PTLWPINFO);
 	if (cnt == 0)
 		return (NULL);
 
 	len = cnt * sizeof(*pl);
 	buf = calloc(1, len);
 	pl = procstat_core_get(core, PSC_TYPE_PTLWPINFO, buf, &len);
 	if (pl == NULL) {
 		free(buf);
 		return (NULL);
 	}
 	*cntp = len / sizeof(*pl);
 	return (pl);
 }
 
 struct ptrace_lwpinfo *
 procstat_getptlwpinfo(struct procstat *procstat, unsigned int *cntp)
 {
 	switch (procstat->type) {
 	case PROCSTAT_CORE:
 	 	return (procstat_getptlwpinfo_core(procstat->core, cntp));
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freeptlwpinfo(struct procstat *procstat __unused,
     struct ptrace_lwpinfo *pl)
 {
 	free(pl);
 }
 
 static struct kinfo_kstack *
 procstat_getkstack_sysctl(pid_t pid, int *cntp)
 {
 	struct kinfo_kstack *kkstp;
 	int error, name[4];
 	size_t len;
 
 	name[0] = CTL_KERN;
 	name[1] = KERN_PROC;
 	name[2] = KERN_PROC_KSTACK;
 	name[3] = pid;
 
 	len = 0;
 	error = sysctl(name, nitems(name), NULL, &len, NULL, 0);
 	if (error < 0 && errno != ESRCH && errno != EPERM && errno != ENOENT) {
 		warn("sysctl: kern.proc.kstack: %d", pid);
 		return (NULL);
 	}
 	if (error == -1 && errno == ENOENT) {
 		warnx("sysctl: kern.proc.kstack unavailable"
 		    " (options DDB or options STACK required in kernel)");
 		return (NULL);
 	}
 	if (error == -1)
 		return (NULL);
 	kkstp = malloc(len);
 	if (kkstp == NULL) {
 		warn("malloc(%zu)", len);
 		return (NULL);
 	}
 	if (sysctl(name, nitems(name), kkstp, &len, NULL, 0) == -1) {
 		warn("sysctl: kern.proc.pid: %d", pid);
 		free(kkstp);
 		return (NULL);
 	}
 	*cntp = len / sizeof(*kkstp);
 
 	return (kkstp);
 }
 
 struct kinfo_kstack *
 procstat_getkstack(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned int *cntp)
 {
 	switch(procstat->type) {
 	case PROCSTAT_KVM:
 		warnx("kvm method is not supported");
 		return (NULL);
 	case PROCSTAT_SYSCTL:
 		return (procstat_getkstack_sysctl(kp->ki_pid, cntp));
 	case PROCSTAT_CORE:
 		warnx("core method is not supported");
 		return (NULL);
 	default:
 		warnx("unknown access method: %d", procstat->type);
 		return (NULL);
 	}
 }
 
 void
 procstat_freekstack(struct procstat *procstat __unused,
     struct kinfo_kstack *kkstp)
 {
 
 	free(kkstp);
 }
Index: head/lib/libprocstat/libprocstat.h
===================================================================
--- head/lib/libprocstat/libprocstat.h	(revision 318735)
+++ head/lib/libprocstat/libprocstat.h	(revision 318736)
@@ -1,226 +1,226 @@
 /*-
  * Copyright (c) 2009 Stanislav Sedov <stas@FreeBSD.org>
  * Copyright (c) 2017 Dell EMC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _LIBPROCSTAT_H_
 #define	_LIBPROCSTAT_H_
 
 /*
  * XXX: sys/elf.h conflicts with zfs_context.h. Workaround this by not
  * including conflicting parts when building zfs code.
  */
 #ifndef ZFS
 #include <sys/elf.h>
 #endif
 #include <sys/caprights.h>
 
 /*
  * Vnode types.
  */
 #define	PS_FST_VTYPE_VNON	1
 #define	PS_FST_VTYPE_VREG	2
 #define	PS_FST_VTYPE_VDIR	3
 #define	PS_FST_VTYPE_VBLK	4
 #define	PS_FST_VTYPE_VCHR	5
 #define	PS_FST_VTYPE_VLNK	6
 #define	PS_FST_VTYPE_VSOCK	7
 #define	PS_FST_VTYPE_VFIFO	8
 #define	PS_FST_VTYPE_VBAD	9
 #define	PS_FST_VTYPE_UNKNOWN	255
 
 /*
  * Descriptor types.
  */
 #define	PS_FST_TYPE_VNODE	1
 #define	PS_FST_TYPE_FIFO	2
 #define	PS_FST_TYPE_SOCKET	3
 #define	PS_FST_TYPE_PIPE	4
 #define	PS_FST_TYPE_PTS		5
 #define	PS_FST_TYPE_KQUEUE	6
 #define	PS_FST_TYPE_CRYPTO	7
 #define	PS_FST_TYPE_MQUEUE	8
 #define	PS_FST_TYPE_SHM		9
 #define	PS_FST_TYPE_SEM		10
 #define	PS_FST_TYPE_UNKNOWN	11
 #define	PS_FST_TYPE_NONE	12
 
 /*
  * Special descriptor numbers.
  */
 #define	PS_FST_UFLAG_RDIR	0x0001
 #define	PS_FST_UFLAG_CDIR	0x0002
 #define	PS_FST_UFLAG_JAIL	0x0004
 #define	PS_FST_UFLAG_TRACE	0x0008
 #define	PS_FST_UFLAG_TEXT	0x0010
 #define	PS_FST_UFLAG_MMAP	0x0020
 #define	PS_FST_UFLAG_CTTY	0x0040
 
 /*
  * Descriptor flags.
  */
 #define PS_FST_FFLAG_READ	0x0001
 #define PS_FST_FFLAG_WRITE	0x0002
 #define	PS_FST_FFLAG_NONBLOCK	0x0004
 #define	PS_FST_FFLAG_APPEND	0x0008
 #define	PS_FST_FFLAG_SHLOCK	0x0010
 #define	PS_FST_FFLAG_EXLOCK	0x0020
 #define	PS_FST_FFLAG_ASYNC	0x0040
 #define	PS_FST_FFLAG_SYNC	0x0080
 #define	PS_FST_FFLAG_NOFOLLOW	0x0100
 #define	PS_FST_FFLAG_CREAT	0x0200
 #define	PS_FST_FFLAG_TRUNC	0x0400
 #define	PS_FST_FFLAG_EXCL	0x0800
 #define	PS_FST_FFLAG_DIRECT	0x1000
 #define	PS_FST_FFLAG_EXEC	0x2000
 #define	PS_FST_FFLAG_HASLOCK	0x4000
 
 struct kinfo_kstack;
 struct kinfo_vmentry;
 struct procstat;
 struct ptrace_lwpinfo;
 struct rlimit;
 struct filestat {
 	int	fs_type;	/* Descriptor type. */
 	int	fs_flags;	/* filestat specific flags. */
 	int	fs_fflags;	/* Descriptor access flags. */
 	int	fs_uflags;	/* How this file is used. */
 	int	fs_fd;		/* File descriptor number. */
 	int	fs_ref_count;	/* Reference count. */
 	off_t	fs_offset;	/* Seek location. */
 	void	*fs_typedep;	/* Type dependent data. */
 	char	*fs_path;
 	STAILQ_ENTRY(filestat)	next;
 	cap_rights_t	fs_cap_rights;	/* Capability rights, if flag set. */
 };
 struct vnstat {
 	uint64_t	vn_fileid;
 	uint64_t	vn_size;
+	uint64_t	vn_dev;
+	uint64_t	vn_fsid;
 	char		*vn_mntdir;
-	uint32_t	vn_dev;
-	uint32_t	vn_fsid;
 	int		vn_type;
 	uint16_t	vn_mode;
 	char		vn_devname[SPECNAMELEN + 1];
 };
 struct ptsstat {
-	uint32_t	dev;
+	uint64_t	dev;
 	char		devname[SPECNAMELEN + 1];
 };
 struct pipestat {
 	size_t		buffer_cnt;
 	uint64_t	addr;
 	uint64_t	peer;
 };
 struct semstat {
 	uint32_t	value;
 	uint16_t	mode;
 };
 struct shmstat {
 	uint64_t	size;
 	uint16_t	mode;
 };
 struct sockstat {
 	uint64_t	inp_ppcb;
 	uint64_t	so_addr;
 	uint64_t	so_pcb;
 	uint64_t	unp_conn;
 	int		dom_family;
 	int		proto;
 	int		so_rcv_sb_state;
 	int		so_snd_sb_state;
 	struct sockaddr_storage	sa_local;	/* Socket address. */
 	struct sockaddr_storage	sa_peer;	/* Peer address. */
 	int		type;
 	char		dname[32];
 };
 
 STAILQ_HEAD(filestat_list, filestat);
 
 __BEGIN_DECLS
 void	procstat_close(struct procstat *procstat);
 void	procstat_freeargv(struct procstat *procstat);
 #ifndef ZFS
 void	procstat_freeauxv(struct procstat *procstat, Elf_Auxinfo *auxv);
 #endif
 void	procstat_freeenvv(struct procstat *procstat);
 void	procstat_freegroups(struct procstat *procstat, gid_t *groups);
 void	procstat_freekstack(struct procstat *procstat,
     struct kinfo_kstack *kkstp);
 void	procstat_freeprocs(struct procstat *procstat, struct kinfo_proc *p);
 void	procstat_freefiles(struct procstat *procstat,
     struct filestat_list *head);
 void	procstat_freeptlwpinfo(struct procstat *procstat,
     struct ptrace_lwpinfo *pl);
 void	procstat_freevmmap(struct procstat *procstat,
     struct kinfo_vmentry *vmmap);
 struct filestat_list	*procstat_getfiles(struct procstat *procstat,
     struct kinfo_proc *kp, int mmapped);
 struct kinfo_proc	*procstat_getprocs(struct procstat *procstat,
     int what, int arg, unsigned int *count);
 int	procstat_get_pipe_info(struct procstat *procstat, struct filestat *fst,
     struct pipestat *pipe, char *errbuf);
 int	procstat_get_pts_info(struct procstat *procstat, struct filestat *fst,
     struct ptsstat *pts, char *errbuf);
 int	procstat_get_sem_info(struct procstat *procstat, struct filestat *fst,
     struct semstat *sem, char *errbuf);
 int	procstat_get_shm_info(struct procstat *procstat, struct filestat *fst,
     struct shmstat *shm, char *errbuf);
 int	procstat_get_socket_info(struct procstat *procstat, struct filestat *fst,
     struct sockstat *sock, char *errbuf);
 int	procstat_get_vnode_info(struct procstat *procstat, struct filestat *fst,
     struct vnstat *vn, char *errbuf);
 char	**procstat_getargv(struct procstat *procstat, struct kinfo_proc *p,
     size_t nchr);
 #ifndef ZFS
 Elf_Auxinfo	*procstat_getauxv(struct procstat *procstat,
     struct kinfo_proc *kp, unsigned int *cntp);
 #endif
 struct ptrace_lwpinfo	*procstat_getptlwpinfo(struct procstat *procstat,
     unsigned int *cntp);
 char	**procstat_getenvv(struct procstat *procstat, struct kinfo_proc *p,
     size_t nchr);
 gid_t	*procstat_getgroups(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned int *count);
 struct kinfo_kstack	*procstat_getkstack(struct procstat *procstat,
     struct kinfo_proc *kp, unsigned int *count);
 int	procstat_getosrel(struct procstat *procstat, struct kinfo_proc *kp,
     int *osrelp);
 int	procstat_getpathname(struct procstat *procstat, struct kinfo_proc *kp,
     char *pathname, size_t maxlen);
 int	procstat_getrlimit(struct procstat *procstat, struct kinfo_proc *kp,
     int which, struct rlimit* rlimit);
 int	procstat_getumask(struct procstat *procstat, struct kinfo_proc *kp,
     unsigned short* umask);
 struct kinfo_vmentry	*procstat_getvmmap(struct procstat *procstat,
     struct kinfo_proc *kp, unsigned int *count);
 struct procstat	*procstat_open_core(const char *filename);
 struct procstat	*procstat_open_sysctl(void);
 struct procstat	*procstat_open_kvm(const char *nlistf, const char *memf);
 __END_DECLS
 
 #endif	/* !_LIBPROCSTAT_H_ */
Index: head/lib/libprocstat/libprocstat_compat.c
===================================================================
--- head/lib/libprocstat/libprocstat_compat.c	(nonexistent)
+++ head/lib/libprocstat/libprocstat_compat.c	(revision 318736)
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 2014 Gleb Kurtsou <gleb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/socket.h>
+#include <string.h>
+
+#include "libprocstat.h"
+
+struct freebsd11_ptsstat {
+	uint32_t	dev;
+	char		devname[SPECNAMELEN + 1];
+};
+
+struct freebsd11_vnstat {
+	uint64_t	vn_fileid;
+	uint64_t	vn_size;
+	char		*vn_mntdir;
+	uint32_t	vn_dev;
+	uint32_t	vn_fsid;
+	int		vn_type;
+	uint16_t	vn_mode;
+	char		vn_devname[SPECNAMELEN + 1];
+};
+struct freebsd11_semstat {
+	uint32_t	value;
+	uint16_t	mode;
+};
+struct freebsd11_shmstat {
+	uint64_t	size;
+	uint16_t	mode;
+};
+
+int	freebsd11_procstat_get_pts_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_ptsstat *pts, char *errbuf);
+int	freebsd11_procstat_get_sem_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_semstat *sem, char *errbuf);
+int	freebsd11_procstat_get_shm_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_shmstat *shm, char *errbuf);
+int	freebsd11_procstat_get_vnode_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_vnstat *vn, char *errbuf);
+
+int
+freebsd11_procstat_get_pts_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_ptsstat *pts_compat, char *errbuf)
+{
+	struct ptsstat pts;
+	int r;
+
+	r = procstat_get_pts_info(procstat, fst, &pts, errbuf);
+	if (r != 0)
+		return (r);
+	pts_compat->dev = pts.dev;
+	memcpy(pts_compat->devname, pts.devname,
+	    sizeof(pts_compat->devname));
+	return (0);
+}
+
+int
+freebsd11_procstat_get_sem_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_semstat *sem_compat, char *errbuf)
+{
+	struct semstat sem;
+	int r;
+
+	r = procstat_get_sem_info(procstat, fst, &sem, errbuf);
+	if (r != 0)
+		return (r);
+	sem_compat->value = sem.value;
+	sem_compat->mode = sem.mode;
+	return (0);
+}
+
+int
+freebsd11_procstat_get_shm_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_shmstat *shm_compat, char *errbuf)
+{
+	struct shmstat shm;
+	int r;
+
+	r = procstat_get_shm_info(procstat, fst, &shm, errbuf);
+	if (r != 0)
+		return (r);
+	shm_compat->size = shm.size;
+	shm_compat->mode = shm.mode;
+	return (0);
+}
+
+int
+freebsd11_procstat_get_vnode_info(struct procstat *procstat,
+    struct filestat *fst, struct freebsd11_vnstat *vn_compat, char *errbuf)
+{
+	struct vnstat vn;
+	int r;
+
+	r = procstat_get_vnode_info(procstat, fst, &vn, errbuf);
+	if (r != 0)
+		return (r);
+	vn_compat->vn_fileid = vn.vn_fileid;
+	vn_compat->vn_size = vn.vn_size;
+	vn_compat->vn_mntdir = vn.vn_mntdir;
+	vn_compat->vn_dev = vn.vn_dev;
+	vn_compat->vn_fsid = vn.vn_fsid;
+	vn_compat->vn_type = vn.vn_type;
+	vn_compat->vn_mode = vn.vn_mode;
+	memcpy(vn_compat->vn_devname, vn.vn_devname,
+	    sizeof(vn_compat->vn_devname));
+	return (0);
+}
+
+__sym_compat(procstat_get_pts_info, freebsd11_procstat_get_pts_info, FBSD_1.2);
+__sym_compat(procstat_get_vnode_info, freebsd11_procstat_get_vnode_info,
+    FBSD_1.2);
+__sym_compat(procstat_get_sem_info, freebsd11_procstat_get_sem_info, FBSD_1.3);
+__sym_compat(procstat_get_shm_info, freebsd11_procstat_get_shm_info, FBSD_1.3);

Property changes on: head/lib/libprocstat/libprocstat_compat.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/lib/libufs/libufs.h
===================================================================
--- head/lib/libufs/libufs.h	(revision 318735)
+++ head/lib/libufs/libufs.h	(revision 318736)
@@ -1,149 +1,149 @@
 /*
  * Copyright (c) 2002 Juli Mallett.  All rights reserved.
  *
  * This software was written by Juli Mallett <jmallett@FreeBSD.org> for the
  * FreeBSD project.  Redistribution and use in source and binary forms, with
  * or without modification, are permitted provided that the following
  * conditions are met:
  *
  * 1. Redistribution of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  * 2. Redistribution in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	__LIBUFS_H__
 #define	__LIBUFS_H__
 
 /*
  * libufs structures.
  */
 
 /*
  * userland ufs disk.
  */
 struct uufsd {
 	const char *d_name;	/* disk name */
 	int d_ufs;		/* decimal UFS version */
 	int d_fd;		/* raw device file descriptor */
 	long d_bsize;		/* device bsize */
 	ufs2_daddr_t d_sblock;	/* superblock location */
 	struct csum *d_sbcsum;	/* Superblock summary info */
 	caddr_t d_inoblock;	/* inode block */
-	ino_t d_inomin;		/* low inode */
-	ino_t d_inomax;		/* high inode */
+	uint32_t d_inomin;	/* low inode (not ino_t for ABI compat) */
+	uint32_t d_inomax;	/* high inode (not ino_t for ABI compat) */
 	union {
 		struct fs d_fs;	/* filesystem information */
 		char d_sb[MAXBSIZE];
 				/* superblock as buffer */
 	} d_sbunion;
 	union {
 		struct cg d_cg;	/* cylinder group */
 		char d_buf[MAXBSIZE];
 				/* cylinder group storage */
 	} d_cgunion;
 	int d_ccg;		/* current cylinder group */
 	int d_lcg;		/* last cylinder group (in d_cg) */
 	const char *d_error;	/* human readable disk error */
 	int d_mine;		/* internal flags */
 #define	d_fs	d_sbunion.d_fs
 #define	d_sb	d_sbunion.d_sb
 #define	d_cg	d_cgunion.d_cg
 };
 
 /*
  * libufs macros (internal, non-exported).
  */
 #ifdef	_LIBUFS
 /*
  * Trace steps through libufs, to be used at entry and erroneous return.
  */
 static inline void
 ERROR(struct uufsd *u, const char *str)
 {
 
 #ifdef	_LIBUFS_DEBUGGING
 	if (str != NULL) {
 		fprintf(stderr, "libufs: %s", str);
 		if (errno != 0)
 			fprintf(stderr, ": %s", strerror(errno));
 		fprintf(stderr, "\n");
 	}
 #endif
 	if (u != NULL)
 		u->d_error = str;
 }
 #endif	/* _LIBUFS */
 
 __BEGIN_DECLS
 
 /*
  * libufs prototypes.
  */
 
 /*
  * block.c
  */
 ssize_t bread(struct uufsd *, ufs2_daddr_t, void *, size_t);
 ssize_t bwrite(struct uufsd *, ufs2_daddr_t, const void *, size_t);
 int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t);
 
 /*
  * cgroup.c
  */
 ufs2_daddr_t cgballoc(struct uufsd *);
 int cgbfree(struct uufsd *, ufs2_daddr_t, long);
 ino_t cgialloc(struct uufsd *);
 int cgread(struct uufsd *);
 int cgread1(struct uufsd *, int);
 int cgwrite(struct uufsd *);
 int cgwrite1(struct uufsd *, int);
 
 /*
  * inode.c
  */
 int getino(struct uufsd *, void **, ino_t, int *);
 int putino(struct uufsd *);
 
 /*
  * sblock.c
  */
 int sbread(struct uufsd *);
 int sbwrite(struct uufsd *, int);
 
 /*
  * type.c
  */
 int ufs_disk_close(struct uufsd *);
 int ufs_disk_fillout(struct uufsd *, const char *);
 int ufs_disk_fillout_blank(struct uufsd *, const char *);
 int ufs_disk_write(struct uufsd *);
 
 /*
  * ffs_subr.c
  */
 void	ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
 void	ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int);
 void	ffs_fragacct(struct fs *, int, int32_t [], int);
 int	ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
 int	ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
 void	ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
 
 __END_DECLS
 
 #endif	/* __LIBUFS_H__ */
Index: head/sbin/badsect/badsect.c
===================================================================
--- head/sbin/badsect/badsect.c	(revision 318735)
+++ head/sbin/badsect/badsect.c	(revision 318736)
@@ -1,193 +1,182 @@
 /*
  * Copyright (c) 1981, 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1981, 1983, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 static const char sccsid[] = "@(#)badsect.c	8.1 (Berkeley) 6/5/93";
 #endif
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * badsect
  *
  * Badsect takes a list of file-system relative sector numbers
  * and makes files containing the blocks of which these sectors are a part.
  * It can be used to contain sectors which have problems if these sectors
  * are not part of the bad file for the pack (see bad144).  For instance,
  * this program can be used if the driver for the file system in question
  * does not support bad block forwarding.
  */
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/disklabel.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <errno.h>
 #include <dirent.h>
 #include <fcntl.h>
 #include <libufs.h>
 #include <paths.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #define sblock	disk.d_fs
 #define	acg	disk.d_cg
 static struct	uufsd disk;
 static struct	fs *fs = &sblock;
 static int	errs;
 
 int	chkuse(daddr_t, int);
 
 static void
 usage(void)
 {
 	fprintf(stderr, "usage: badsect bbdir blkno ...\n");
 	exit(1);
 }
 
 int
 main(int argc, char *argv[])
 {
 	daddr_t diskbn;
 	daddr_t number;
 	struct stat stbuf, devstat;
 	struct dirent *dp;
 	DIR *dirp;
 	char name[2 * MAXPATHLEN];
 	char *name_dir_end;
 
 	if (argc < 3)
 		usage();
 	if (chdir(argv[1]) < 0 || stat(".", &stbuf) < 0)
 		err(2, "%s", argv[1]);
 	strcpy(name, _PATH_DEV);
 	if ((dirp = opendir(name)) == NULL)
 		err(3, "%s", name);
 	name_dir_end = name + strlen(name);
 	while ((dp = readdir(dirp)) != NULL) {
 		strcpy(name_dir_end, dp->d_name);
 		if (lstat(name, &devstat) < 0)
 			err(4, "%s", name);
 		if (stbuf.st_dev == devstat.st_rdev &&
 		    (devstat.st_mode & IFMT) == IFCHR)
 			break;
 	}
 	closedir(dirp);
 	if (dp == NULL) {
 		printf("Cannot find dev 0%lo corresponding to %s\n",
 		    (u_long)stbuf.st_rdev, argv[1]);
 		exit(5);
 	}
 	if (ufs_disk_fillout(&disk, name) == -1) {
 		if (disk.d_error != NULL)
 			errx(6, "%s: %s", name, disk.d_error);
 		else
 			err(7, "%s", name);
 	}
 	for (argc -= 2, argv += 2; argc > 0; argc--, argv++) {
 		number = strtol(*argv, NULL, 0);
 		if (errno == EINVAL || errno == ERANGE)
 			err(8, "%s", *argv);
 		if (chkuse(number, 1))
 			continue;
-		/*
-		 * Print a warning if converting the block number to a dev_t
-		 * will truncate it.  badsect was not very useful in versions
-		 * of BSD before 4.4 because dev_t was 16 bits and another
-		 * bit was lost by bogus sign extensions.
-		 */
 		diskbn = dbtofsb(fs, number);
-		if ((dev_t)diskbn != diskbn) {
-			printf("sector %ld cannot be represented as a dev_t\n",
-			    (long)number);
-			errs++;
-		}
-		else if (mknod(*argv, IFMT|0600, (dev_t)diskbn) < 0) {
+		if (mknod(*argv, IFMT|0600, (dev_t)diskbn) < 0) {
 			warn("%s", *argv);
 			errs++;
 		}
 	}
 	ufs_disk_close(&disk);
 	printf("Don't forget to run ``fsck %s''\n", name);
 	exit(errs);
 }
 
 int
 chkuse(daddr_t blkno, int cnt)
 {
 	int cg;
 	daddr_t fsbn, bn;
 
 	fsbn = dbtofsb(fs, blkno);
 	if ((unsigned)(fsbn+cnt) > fs->fs_size) {
 		printf("block %ld out of range of file system\n", (long)blkno);
 		return (1);
 	}
 	cg = dtog(fs, fsbn);
 	if (fsbn < cgdmin(fs, cg)) {
 		if (cg == 0 || (fsbn+cnt) > cgsblock(fs, cg)) {
 			printf("block %ld in non-data area: cannot attach\n",
 			    (long)blkno);
 			return (1);
 		}
 	} else {
 		if ((fsbn+cnt) > cgbase(fs, cg+1)) {
 			printf("block %ld in non-data area: cannot attach\n",
 			    (long)blkno);
 			return (1);
 		}
 	}
 	if (cgread1(&disk, cg) != 1) {
 		fprintf(stderr, "cg %d: could not be read\n", cg);
 		errs++;
 		return (1);
 	}
 	if (!cg_chkmagic(&acg)) {
 		fprintf(stderr, "cg %d: bad magic number\n", cg);
 		errs++;
 		return (1);
 	}
 	bn = dtogd(fs, fsbn);
 	if (isclr(cg_blksfree(&acg), bn))
 		printf("Warning: sector %ld is in use\n", (long)blkno);
 	return (0);
 }
Index: head/sbin/fsck_ffs/suj.c
===================================================================
--- head/sbin/fsck_ffs/suj.c	(revision 318735)
+++ head/sbin/fsck_ffs/suj.c	(revision 318736)
@@ -1,2802 +1,2802 @@
 /*-
  * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/disk.h>
 #include <sys/disklabel.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <assert.h>
 #include <err.h>
 #include <setjmp.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <libufs.h>
 #include <string.h>
 #include <strings.h>
 #include <sysexits.h>
 #include <time.h>
 
 #include "fsck.h"
 
 #define	DOTDOT_OFFSET	DIRECTSIZ(1)
 #define	SUJ_HASHSIZE	2048
 #define	SUJ_HASHMASK	(SUJ_HASHSIZE - 1)
 #define	SUJ_HASH(x)	((x * 2654435761) & SUJ_HASHMASK)
 
 struct suj_seg {
 	TAILQ_ENTRY(suj_seg) ss_next;
 	struct jsegrec	ss_rec;
 	uint8_t		*ss_blk;
 };
 
 struct suj_rec {
 	TAILQ_ENTRY(suj_rec) sr_next;
 	union jrec	*sr_rec;
 };
 TAILQ_HEAD(srechd, suj_rec);
 
 struct suj_ino {
 	LIST_ENTRY(suj_ino)	si_next;
 	struct srechd		si_recs;
 	struct srechd		si_newrecs;
 	struct srechd		si_movs;
 	struct jtrncrec		*si_trunc;
 	ino_t			si_ino;
 	char			si_skipparent;
 	char			si_hasrecs;
 	char			si_blkadj;
 	char			si_linkadj;
 	int			si_mode;
 	nlink_t			si_nlinkadj;
 	nlink_t			si_nlink;
 	nlink_t			si_dotlinks;
 };
 LIST_HEAD(inohd, suj_ino);
 
 struct suj_blk {
 	LIST_ENTRY(suj_blk)	sb_next;
 	struct srechd		sb_recs;
 	ufs2_daddr_t		sb_blk;
 };
 LIST_HEAD(blkhd, suj_blk);
 
 struct data_blk {
 	LIST_ENTRY(data_blk)	db_next;
 	uint8_t			*db_buf;
 	ufs2_daddr_t		db_blk;
 	int			db_size;
 	int			db_dirty;
 };
 
 struct ino_blk {
 	LIST_ENTRY(ino_blk)	ib_next;
 	uint8_t			*ib_buf;
 	int			ib_dirty;
 	ufs2_daddr_t		ib_blk;
 };
 LIST_HEAD(iblkhd, ino_blk);
 
 struct suj_cg {
 	LIST_ENTRY(suj_cg)	sc_next;
 	struct blkhd		sc_blkhash[SUJ_HASHSIZE];
 	struct inohd		sc_inohash[SUJ_HASHSIZE];
 	struct iblkhd		sc_iblkhash[SUJ_HASHSIZE];
 	struct ino_blk		*sc_lastiblk;
 	struct suj_ino		*sc_lastino;
 	struct suj_blk		*sc_lastblk;
 	uint8_t			*sc_cgbuf;
 	struct cg		*sc_cgp;
 	int			sc_dirty;
 	int			sc_cgx;
 };
 
 static LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE];
 static LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE];
 static struct suj_cg *lastcg;
 static struct data_blk *lastblk;
 
 static TAILQ_HEAD(seghd, suj_seg) allsegs;
 static uint64_t oldseq;
 static struct uufsd *disk = NULL;
 static struct fs *fs = NULL;
 static ino_t sujino;
 
 /*
  * Summary statistics.
  */
 static uint64_t freefrags;
 static uint64_t freeblocks;
 static uint64_t freeinos;
 static uint64_t freedir;
 static uint64_t jbytes;
 static uint64_t jrecs;
 
 static jmp_buf	jmpbuf;
 
 typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
 static void err_suj(const char *, ...) __dead2;
 static void ino_trunc(ino_t, off_t);
 static void ino_decr(ino_t);
 static void ino_adjust(struct suj_ino *);
 static void ino_build(struct suj_ino *);
 static int blk_isfree(ufs2_daddr_t);
 static void initsuj(void);
 
 static void *
 errmalloc(size_t n)
 {
 	void *a;
 
 	a = Malloc(n);
 	if (a == NULL)
 		err(EX_OSERR, "malloc(%zu)", n);
 	return (a);
 }
 
 /*
  * When hit a fatal error in journalling check, print out
  * the error and then offer to fallback to normal fsck.
  */
 static void
 err_suj(const char * restrict fmt, ...)
 {
 	va_list ap;
 
 	if (preen)
 		(void)fprintf(stdout, "%s: ", cdevname);
 
 	va_start(ap, fmt);
 	(void)vfprintf(stdout, fmt, ap);
 	va_end(ap);
 
 	longjmp(jmpbuf, -1);
 }
 
 /*
  * Open the given provider, load superblock.
  */
 static void
 opendisk(const char *devnam)
 {
 	if (disk != NULL)
 		return;
 	disk = Malloc(sizeof(*disk));
 	if (disk == NULL)
 		err(EX_OSERR, "malloc(%zu)", sizeof(*disk));
 	if (ufs_disk_fillout(disk, devnam) == -1) {
 		err(EX_OSERR, "ufs_disk_fillout(%s) failed: %s", devnam,
 		    disk->d_error);
 	}
 	fs = &disk->d_fs;
 	if (real_dev_bsize == 0 && ioctl(disk->d_fd, DIOCGSECTORSIZE,
 	    &real_dev_bsize) == -1)
 		real_dev_bsize = secsize;
 	if (debug)
 		printf("dev_bsize %u\n", real_dev_bsize);
 }
 
 /*
  * Mark file system as clean, write the super-block back, close the disk.
  */
 static void
 closedisk(const char *devnam)
 {
 	struct csum *cgsum;
 	uint32_t i;
 
 	/*
 	 * Recompute the fs summary info from correct cs summaries.
 	 */
 	bzero(&fs->fs_cstotal, sizeof(struct csum_total));
 	for (i = 0; i < fs->fs_ncg; i++) {
 		cgsum = &fs->fs_cs(fs, i);
 		fs->fs_cstotal.cs_nffree += cgsum->cs_nffree;
 		fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree;
 		fs->fs_cstotal.cs_nifree += cgsum->cs_nifree;
 		fs->fs_cstotal.cs_ndir += cgsum->cs_ndir;
 	}
 	fs->fs_pendinginodes = 0;
 	fs->fs_pendingblocks = 0;
 	fs->fs_clean = 1;
 	fs->fs_time = time(NULL);
 	fs->fs_mtime = time(NULL);
 	if (sbwrite(disk, 0) == -1)
 		err(EX_OSERR, "sbwrite(%s)", devnam);
 	if (ufs_disk_close(disk) == -1)
 		err(EX_OSERR, "ufs_disk_close(%s)", devnam);
 	free(disk);
 	disk = NULL;
 	fs = NULL;
 }
 
 /*
  * Lookup a cg by number in the hash so we can keep track of which cgs
  * need stats rebuilt.
  */
 static struct suj_cg *
 cg_lookup(int cgx)
 {
 	struct cghd *hd;
 	struct suj_cg *sc;
 
 	if (cgx < 0 || cgx >= fs->fs_ncg)
 		err_suj("Bad cg number %d\n", cgx);
 	if (lastcg && lastcg->sc_cgx == cgx)
 		return (lastcg);
 	hd = &cghash[SUJ_HASH(cgx)];
 	LIST_FOREACH(sc, hd, sc_next)
 		if (sc->sc_cgx == cgx) {
 			lastcg = sc;
 			return (sc);
 		}
 	sc = errmalloc(sizeof(*sc));
 	bzero(sc, sizeof(*sc));
 	sc->sc_cgbuf = errmalloc(fs->fs_bsize);
 	sc->sc_cgp = (struct cg *)sc->sc_cgbuf;
 	sc->sc_cgx = cgx;
 	LIST_INSERT_HEAD(hd, sc, sc_next);
 	if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf,
 	    fs->fs_bsize) == -1)
 		err_suj("Unable to read cylinder group %d\n", sc->sc_cgx);
 
 	return (sc);
 }
 
 /*
  * Lookup an inode number in the hash and allocate a suj_ino if it does
  * not exist.
  */
 static struct suj_ino *
 ino_lookup(ino_t ino, int creat)
 {
 	struct suj_ino *sino;
 	struct inohd *hd;
 	struct suj_cg *sc;
 
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	if (sc->sc_lastino && sc->sc_lastino->si_ino == ino)
 		return (sc->sc_lastino);
 	hd = &sc->sc_inohash[SUJ_HASH(ino)];
 	LIST_FOREACH(sino, hd, si_next)
 		if (sino->si_ino == ino)
 			return (sino);
 	if (creat == 0)
 		return (NULL);
 	sino = errmalloc(sizeof(*sino));
 	bzero(sino, sizeof(*sino));
 	sino->si_ino = ino;
 	TAILQ_INIT(&sino->si_recs);
 	TAILQ_INIT(&sino->si_newrecs);
 	TAILQ_INIT(&sino->si_movs);
 	LIST_INSERT_HEAD(hd, sino, si_next);
 
 	return (sino);
 }
 
 /*
  * Lookup a block number in the hash and allocate a suj_blk if it does
  * not exist.
  */
 static struct suj_blk *
 blk_lookup(ufs2_daddr_t blk, int creat)
 {
 	struct suj_blk *sblk;
 	struct suj_cg *sc;
 	struct blkhd *hd;
 
 	sc = cg_lookup(dtog(fs, blk));
 	if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk)
 		return (sc->sc_lastblk);
 	hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(sblk, hd, sb_next)
 		if (sblk->sb_blk == blk)
 			return (sblk);
 	if (creat == 0)
 		return (NULL);
 	sblk = errmalloc(sizeof(*sblk));
 	bzero(sblk, sizeof(*sblk));
 	sblk->sb_blk = blk;
 	TAILQ_INIT(&sblk->sb_recs);
 	LIST_INSERT_HEAD(hd, sblk, sb_next);
 
 	return (sblk);
 }
 
 static struct data_blk *
 dblk_lookup(ufs2_daddr_t blk)
 {
 	struct data_blk *dblk;
 	struct dblkhd *hd;
 
 	hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))];
 	if (lastblk && lastblk->db_blk == blk)
 		return (lastblk);
 	LIST_FOREACH(dblk, hd, db_next)
 		if (dblk->db_blk == blk)
 			return (dblk);
 	/*
 	 * The inode block wasn't located, allocate a new one.
 	 */
 	dblk = errmalloc(sizeof(*dblk));
 	bzero(dblk, sizeof(*dblk));
 	LIST_INSERT_HEAD(hd, dblk, db_next);
 	dblk->db_blk = blk;
 	return (dblk);
 }
 
 static uint8_t *
 dblk_read(ufs2_daddr_t blk, int size)
 {
 	struct data_blk *dblk;
 
 	dblk = dblk_lookup(blk);
 	/*
 	 * I doubt size mismatches can happen in practice but it is trivial
 	 * to handle.
 	 */
 	if (size != dblk->db_size) {
 		if (dblk->db_buf)
 			free(dblk->db_buf);
 		dblk->db_buf = errmalloc(size);
 		dblk->db_size = size;
 		if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1)
 			err_suj("Failed to read data block %jd\n", blk);
 	}
 	return (dblk->db_buf);
 }
 
 static void
 dblk_dirty(ufs2_daddr_t blk)
 {
 	struct data_blk *dblk;
 
 	dblk = dblk_lookup(blk);
 	dblk->db_dirty = 1;
 }
 
 static void
 dblk_write(void)
 {
 	struct data_blk *dblk;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(dblk, &dbhash[i], db_next) {
 			if (dblk->db_dirty == 0 || dblk->db_size == 0)
 				continue;
 			if (bwrite(disk, fsbtodb(fs, dblk->db_blk),
 			    dblk->db_buf, dblk->db_size) == -1)
 				err_suj("Unable to write block %jd\n",
 				    dblk->db_blk);
 		}
 	}
 }
 
 static union dinode *
 ino_read(ino_t ino)
 {
 	struct ino_blk *iblk;
 	struct iblkhd *hd;
 	struct suj_cg *sc;
 	ufs2_daddr_t blk;
 	int off;
 
 	blk = ino_to_fsba(fs, ino);
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	iblk = sc->sc_lastiblk;
 	if (iblk && iblk->ib_blk == blk)
 		goto found;
 	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next)
 		if (iblk->ib_blk == blk)
 			goto found;
 	/*
 	 * The inode block wasn't located, allocate a new one.
 	 */
 	iblk = errmalloc(sizeof(*iblk));
 	bzero(iblk, sizeof(*iblk));
 	iblk->ib_buf = errmalloc(fs->fs_bsize);
 	iblk->ib_blk = blk;
 	LIST_INSERT_HEAD(hd, iblk, ib_next);
 	if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1)
 		err_suj("Failed to read inode block %jd\n", blk);
 found:
 	sc->sc_lastiblk = iblk;
 	off = ino_to_fsbo(fs, ino);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off];
 	else
 		return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off];
 }
 
 static void
 ino_dirty(ino_t ino)
 {
 	struct ino_blk *iblk;
 	struct iblkhd *hd;
 	struct suj_cg *sc;
 	ufs2_daddr_t blk;
 
 	blk = ino_to_fsba(fs, ino);
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	iblk = sc->sc_lastiblk;
 	if (iblk && iblk->ib_blk == blk) {
 		iblk->ib_dirty = 1;
 		return;
 	}
 	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next) {
 		if (iblk->ib_blk == blk) {
 			iblk->ib_dirty = 1;
 			return;
 		}
 	}
 	ino_read(ino);
 	ino_dirty(ino);
 }
 
 static void
 iblk_write(struct ino_blk *iblk)
 {
 
 	if (iblk->ib_dirty == 0)
 		return;
 	if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf,
 	    fs->fs_bsize) == -1)
 		err_suj("Failed to write inode block %jd\n", iblk->ib_blk);
 }
 
 static int
 blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags)
 {
 	ufs2_daddr_t bstart;
 	ufs2_daddr_t bend;
 	ufs2_daddr_t end;
 
 	end = start + frags;
 	bstart = brec->jb_blkno + brec->jb_oldfrags;
 	bend = bstart + brec->jb_frags;
 	if (start < bend && end > bstart)
 		return (1);
 	return (0);
 }
 
 static int
 blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start,
     int frags)
 {
 
 	if (brec->jb_ino != ino || brec->jb_lbn != lbn)
 		return (0);
 	if (brec->jb_blkno + brec->jb_oldfrags != start)
 		return (0);
 	if (brec->jb_frags < frags)
 		return (0);
 	return (1);
 }
 
 static void
 blk_setmask(struct jblkrec *brec, int *mask)
 {
 	int i;
 
 	for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++)
 		*mask |= 1 << i;
 }
 
 /*
  * Determine whether a given block has been reallocated to a new location.
  * Returns a mask of overlapping bits if any frags have been reused or
  * zero if the block has not been re-used and the contents can be trusted.
  *
  * This is used to ensure that an orphaned pointer due to truncate is safe
  * to be freed.  The mask value can be used to free partial blocks.
  */
 static int
 blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
 {
 	struct suj_blk *sblk;
 	struct suj_rec *srec;
 	struct jblkrec *brec;
 	int mask;
 	int off;
 
 	/*
 	 * To be certain we're not freeing a reallocated block we lookup
 	 * this block in the blk hash and see if there is an allocation
 	 * journal record that overlaps with any fragments in the block
 	 * we're concerned with.  If any fragments have ben reallocated
 	 * the block has already been freed and re-used for another purpose.
 	 */
 	mask = 0;
 	sblk = blk_lookup(blknum(fs, blk), 0);
 	if (sblk == NULL)
 		return (0);
 	off = blk - sblk->sb_blk;
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		brec = (struct jblkrec *)srec->sr_rec;
 		/*
 		 * If the block overlaps but does not match
 		 * exactly this record refers to the current
 		 * location.
 		 */
 		if (blk_overlaps(brec, blk, frags) == 0)
 			continue;
 		if (blk_equals(brec, ino, lbn, blk, frags) == 1)
 			mask = 0;
 		else
 			blk_setmask(brec, &mask);
 	}
 	if (debug)
 		printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n",
 		    blk, sblk->sb_blk, off, mask);
 	return (mask >> off);
 }
 
 /*
  * Determine whether it is safe to follow an indirect.  It is not safe
  * if any part of the indirect has been reallocated or the last journal
  * entry was an allocation.  Just allocated indirects may not have valid
  * pointers yet and all of their children will have their own records.
  * It is also not safe to follow an indirect if the cg bitmap has been
  * cleared as a new allocation may write to the block prior to the journal
  * being written.
  *
  * Returns 1 if it's safe to follow the indirect and 0 otherwise.
  */
 static int
 blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn)
 {
 	struct suj_blk *sblk;
 	struct jblkrec *brec;
 
 	sblk = blk_lookup(blk, 0);
 	if (sblk == NULL)
 		return (1);
 	if (TAILQ_EMPTY(&sblk->sb_recs))
 		return (1);
 	brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec;
 	if (blk_equals(brec, ino, lbn, blk, fs->fs_frag))
 		if (brec->jb_op == JOP_FREEBLK)
 			return (!blk_isfree(blk));
 	return (0);
 }
 
 /*
  * Clear an inode from the cg bitmap.  If the inode was already clear return
  * 0 so the caller knows it does not have to check the inode contents.
  */
 static int
 ino_free(ino_t ino, int mode)
 {
 	struct suj_cg *sc;
 	uint8_t *inosused;
 	struct cg *cgp;
 	int cg;
 
 	cg = ino_to_cg(fs, ino);
 	ino = ino % fs->fs_ipg;
 	sc = cg_lookup(cg);
 	cgp = sc->sc_cgp;
 	inosused = cg_inosused(cgp);
 	/*
 	 * The bitmap may never have made it to the disk so we have to
 	 * conditionally clear.  We can avoid writing the cg in this case.
 	 */
 	if (isclr(inosused, ino))
 		return (0);
 	freeinos++;
 	clrbit(inosused, ino);
 	if (ino < cgp->cg_irotor)
 		cgp->cg_irotor = ino;
 	cgp->cg_cs.cs_nifree++;
 	if ((mode & IFMT) == IFDIR) {
 		freedir++;
 		cgp->cg_cs.cs_ndir--;
 	}
 	sc->sc_dirty = 1;
 
 	return (1);
 }
 
 /*
  * Free 'frags' frags starting at filesystem block 'bno' skipping any frags
  * set in the mask.
  */
 static void
 blk_free(ufs2_daddr_t bno, int mask, int frags)
 {
 	ufs1_daddr_t fragno, cgbno;
 	struct suj_cg *sc;
 	struct cg *cgp;
 	int i, cg;
 	uint8_t *blksfree;
 
 	if (debug)
 		printf("Freeing %d frags at blk %jd mask 0x%x\n",
 		    frags, bno, mask);
 	cg = dtog(fs, bno);
 	sc = cg_lookup(cg);
 	cgp = sc->sc_cgp;
 	cgbno = dtogd(fs, bno);
 	blksfree = cg_blksfree(cgp);
 
 	/*
 	 * If it's not allocated we only wrote the journal entry
 	 * and never the bitmaps.  Here we unconditionally clear and
 	 * resolve the cg summary later.
 	 */
 	if (frags == fs->fs_frag && mask == 0) {
 		fragno = fragstoblks(fs, cgbno);
 		ffs_setblock(fs, blksfree, fragno);
 		freeblocks++;
 	} else {
 		/*
 		 * deallocate the fragment
 		 */
 		for (i = 0; i < frags; i++)
 			if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) {
 				freefrags++;
 				setbit(blksfree, cgbno + i);
 			}
 	}
 	sc->sc_dirty = 1;
 }
 
 /*
  * Returns 1 if the whole block starting at 'bno' is marked free and 0
  * otherwise.
  */
 static int
 blk_isfree(ufs2_daddr_t bno)
 {
 	struct suj_cg *sc;
 
 	sc = cg_lookup(dtog(fs, bno));
 	return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno));
 }
 
 /*
  * Fetch an indirect block to find the block at a given lbn.  The lbn
  * may be negative to fetch a specific indirect block pointer or positive
  * to fetch a specific block.
  */
 static ufs2_daddr_t
 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn)
 {
 	ufs2_daddr_t *bap2;
 	ufs2_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs_lbn_t base;
 	int level;
 	int i;
 
 	if (blk == 0)
 		return (0);
 	level = lbn_level(cur);
 	if (level == -1)
 		err_suj("Invalid indir lbn %jd\n", lbn);
 	if (level == 0 && lbn < 0)
 		err_suj("Invalid lbn %jd\n", lbn);
 	bap2 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap1 = (void *)bap2;
 	lbnadd = 1;
 	base = -(cur + level);
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	if (lbn > 0)
 		i = (lbn - base) / lbnadd;
 	else
 		i = (-lbn - base) / lbnadd;
 	if (i < 0 || i >= NINDIR(fs))
 		err_suj("Invalid indirect index %d produced by lbn %jd\n",
 		    i, lbn);
 	if (level == 0)
 		cur = base + (i * lbnadd);
 	else
 		cur = -(base + (i * lbnadd)) - (level - 1);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		blk = bap1[i];
 	else
 		blk = bap2[i];
 	if (cur == lbn)
 		return (blk);
 	if (level == 0)
 		err_suj("Invalid lbn %jd at level 0\n", lbn);
 	return indir_blkatoff(blk, ino, cur, lbn);
 }
 
 /*
  * Finds the disk block address at the specified lbn within the inode
  * specified by ip.  This follows the whole tree and honors di_size and
  * di_extsize so it is a true test of reachability.  The lbn may be
  * negative if an extattr or indirect block is requested.
  */
 static ufs2_daddr_t
 ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags)
 {
 	ufs_lbn_t tmpval;
 	ufs_lbn_t cur;
 	ufs_lbn_t next;
 	int i;
 
 	/*
 	 * Handle extattr blocks first.
 	 */
 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
 		lbn = -1 - lbn;
 		if (lbn > lblkno(fs, ip->dp2.di_extsize - 1))
 			return (0);
 		*frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn));
 		return (ip->dp2.di_extb[lbn]);
 	}
 	/*
 	 * Now direct and indirect.
 	 */
 	if (DIP(ip, di_mode) == IFLNK &&
 	    DIP(ip, di_size) < fs->fs_maxsymlinklen)
 		return (0);
 	if (lbn >= 0 && lbn < UFS_NDADDR) {
 		*frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn));
 		return (DIP(ip, di_db[lbn]));
 	}
 	*frags = fs->fs_frag;
 
 	for (i = 0, tmpval = NINDIR(fs), cur = UFS_NDADDR; i < UFS_NIADDR; i++,
 	    tmpval *= NINDIR(fs), cur = next) {
 		next = cur + tmpval;
 		if (lbn == -cur - i)
 			return (DIP(ip, di_ib[i]));
 		/*
 		 * Determine whether the lbn in question is within this tree.
 		 */
 		if (lbn < 0 && -lbn >= next)
 			continue;
 		if (lbn > 0 && lbn >= next)
 			continue;
 		return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn);
 	}
 	err_suj("lbn %jd not in ino\n", lbn);
 	/* NOTREACHED */
 }
 
 /*
  * Determine whether a block exists at a particular lbn in an inode.
  * Returns 1 if found, 0 if not.  lbn may be negative for indirects
  * or ext blocks.
  */
 static int
 blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags)
 {
 	union dinode *ip;
 	ufs2_daddr_t nblk;
 
 	ip = ino_read(ino);
 
 	if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0)
 		return (0);
 	nblk = ino_blkatoff(ip, ino, lbn, frags);
 
 	return (nblk == blk);
 }
 
 /*
  * Clear the directory entry at diroff that should point to child.  Minimal
  * checking is done and it is assumed that this path was verified with isat.
  */
 static void
 ino_clrat(ino_t parent, off_t diroff, ino_t child)
 {
 	union dinode *dip;
 	struct direct *dp;
 	ufs2_daddr_t blk;
 	uint8_t *block;
 	ufs_lbn_t lbn;
 	int blksize;
 	int frags;
 	int doff;
 
 	if (debug)
 		printf("Clearing inode %ju from parent %ju at offset %jd\n",
 		    (uintmax_t)child, (uintmax_t)parent, diroff);
 
 	lbn = lblkno(fs, diroff);
 	doff = blkoff(fs, diroff);
 	dip = ino_read(parent);
 	blk = ino_blkatoff(dip, parent, lbn, &frags);
 	blksize = sblksize(fs, DIP(dip, di_size), lbn);
 	block = dblk_read(blk, blksize);
 	dp = (struct direct *)&block[doff];
 	if (dp->d_ino != child)
 		errx(1, "Inode %ju does not exist in %ju at %jd",
 		    (uintmax_t)child, (uintmax_t)parent, diroff);
 	dp->d_ino = 0;
 	dblk_dirty(blk);
 	/*
 	 * The actual .. reference count will already have been removed
 	 * from the parent by the .. remref record.
 	 */
 }
 
 /*
  * Determines whether a pointer to an inode exists within a directory
  * at a specified offset.  Returns the mode of the found entry.
  */
 static int
 ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot)
 {
 	union dinode *dip;
 	struct direct *dp;
 	ufs2_daddr_t blk;
 	uint8_t *block;
 	ufs_lbn_t lbn;
 	int blksize;
 	int frags;
 	int dpoff;
 	int doff;
 
 	*isdot = 0;
 	dip = ino_read(parent);
 	*mode = DIP(dip, di_mode);
 	if ((*mode & IFMT) != IFDIR) {
 		if (debug) {
 			/*
 			 * This can happen if the parent inode
 			 * was reallocated.
 			 */
 			if (*mode != 0)
 				printf("Directory %ju has bad mode %o\n",
 				    (uintmax_t)parent, *mode);
 			else
 				printf("Directory %ju has zero mode\n",
 				    (uintmax_t)parent);
 		}
 		return (0);
 	}
 	lbn = lblkno(fs, diroff);
 	doff = blkoff(fs, diroff);
 	blksize = sblksize(fs, DIP(dip, di_size), lbn);
 	if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) {
 		if (debug)
 			printf("ino %ju absent from %ju due to offset %jd"
 			    " exceeding size %jd\n",
 			    (uintmax_t)child, (uintmax_t)parent, diroff,
 			    DIP(dip, di_size));
 		return (0);
 	}
 	blk = ino_blkatoff(dip, parent, lbn, &frags);
 	if (blk <= 0) {
 		if (debug)
 			printf("Sparse directory %ju", (uintmax_t)parent);
 		return (0);
 	}
 	block = dblk_read(blk, blksize);
 	/*
 	 * Walk through the records from the start of the block to be
 	 * certain we hit a valid record and not some junk in the middle
 	 * of a file name.  Stop when we reach or pass the expected offset.
 	 */
 	dpoff = rounddown(doff, DIRBLKSIZ);
 	do {
 		dp = (struct direct *)&block[dpoff];
 		if (dpoff == doff)
 			break;
 		if (dp->d_reclen == 0)
 			break;
 		dpoff += dp->d_reclen;
 	} while (dpoff <= doff);
 	if (dpoff > fs->fs_bsize)
 		err_suj("Corrupt directory block in dir ino %ju\n",
 		    (uintmax_t)parent);
 	/* Not found. */
 	if (dpoff != doff) {
 		if (debug)
 			printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n",
 			    (uintmax_t)child, (uintmax_t)parent, lbn, dpoff);
 		return (0);
 	}
 	/*
 	 * We found the item in question.  Record the mode and whether it's
 	 * a . or .. link for the caller.
 	 */
 	if (dp->d_ino == child) {
 		if (child == parent)
 			*isdot = 1;
 		else if (dp->d_namlen == 2 &&
 		    dp->d_name[0] == '.' && dp->d_name[1] == '.')
 			*isdot = 1;
 		*mode = DTTOIF(dp->d_type);
 		return (1);
 	}
 	if (debug)
 		printf("ino %ju doesn't match dirent ino %ju in parent %ju\n",
 		    (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent);
 	return (0);
 }
 
 #define	VISIT_INDIR	0x0001
 #define	VISIT_EXT	0x0002
 #define	VISIT_ROOT	0x0004	/* Operation came via root & valid pointers. */
 
 /*
  * Read an indirect level which may or may not be linked into an inode.
  */
 static void
 indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags,
     ino_visitor visitor, int flags)
 {
 	ufs2_daddr_t *bap2;
 	ufs1_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs2_daddr_t nblk;
 	ufs_lbn_t nlbn;
 	int level;
 	int i;
 
 	/*
 	 * Don't visit indirect blocks with contents we can't trust.  This
 	 * should only happen when indir_visit() is called to complete a
 	 * truncate that never finished and not when a pointer is found via
 	 * an inode.
 	 */
 	if (blk == 0)
 		return;
 	level = lbn_level(lbn);
 	if (level == -1)
 		err_suj("Invalid level for lbn %jd\n", lbn);
 	if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) {
 		if (debug)
 			printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n",
 			    blk, (uintmax_t)ino, lbn, level);
 		goto out;
 	}
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	bap1 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap2 = (void *)bap1;
 	for (i = 0; i < NINDIR(fs); i++) {
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			nblk = *bap1++;
 		else
 			nblk = *bap2++;
 		if (nblk == 0)
 			continue;
 		if (level == 0) {
 			nlbn = -lbn + i * lbnadd;
 			(*frags) += fs->fs_frag;
 			visitor(ino, nlbn, nblk, fs->fs_frag);
 		} else {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			indir_visit(ino, nlbn, nblk, frags, visitor, flags);
 		}
 	}
 out:
 	if (flags & VISIT_INDIR) {
 		(*frags) += fs->fs_frag;
 		visitor(ino, lbn, blk, fs->fs_frag);
 	}
 }
 
 /*
  * Visit each block in an inode as specified by 'flags' and call a
  * callback function.  The callback may inspect or free blocks.  The
  * count of frags found according to the size in the file is returned.
  * This is not valid for sparse files but may be used to determine
  * the correct di_blocks for a file.
  */
 static uint64_t
 ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags)
 {
 	ufs_lbn_t nextlbn;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 	uint64_t size;
 	uint64_t fragcnt;
 	int mode;
 	int frags;
 	int i;
 
 	size = DIP(ip, di_size);
 	mode = DIP(ip, di_mode) & IFMT;
 	fragcnt = 0;
 	if ((flags & VISIT_EXT) &&
 	    fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) {
 		for (i = 0; i < UFS_NXADDR; i++) {
 			if (ip->dp2.di_extb[i] == 0)
 				continue;
 			frags = sblksize(fs, ip->dp2.di_extsize, i);
 			frags = numfrags(fs, frags);
 			fragcnt += frags;
 			visitor(ino, -1 - i, ip->dp2.di_extb[i], frags);
 		}
 	}
 	/* Skip datablocks for short links and devices. */
 	if (mode == IFBLK || mode == IFCHR ||
 	    (mode == IFLNK && size < fs->fs_maxsymlinklen))
 		return (fragcnt);
 	for (i = 0; i < UFS_NDADDR; i++) {
 		if (DIP(ip, di_db[i]) == 0)
 			continue;
 		frags = sblksize(fs, size, i);
 		frags = numfrags(fs, frags);
 		fragcnt += frags;
 		visitor(ino, i, DIP(ip, di_db[i]), frags);
 	}
 	/*
 	 * We know the following indirects are real as we're following
 	 * real pointers to them.
 	 */
 	flags |= VISIT_ROOT;
 	for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++,
 	    lbn = nextlbn) {
 		nextlbn = lbn + tmpval;
 		tmpval *= NINDIR(fs);
 		if (DIP(ip, di_ib[i]) == 0)
 			continue;
 		indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor,
 		    flags);
 	}
 	return (fragcnt);
 }
 
 /*
  * Null visitor function used when we just want to count blocks and
  * record the lbn.
  */
 ufs_lbn_t visitlbn;
 static void
 null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	if (lbn > 0)
 		visitlbn = lbn;
 }
 
 /*
  * Recalculate di_blocks when we discover that a block allocation or
  * free was not successfully completed.  The kernel does not roll this back
  * because it would be too expensive to compute which indirects were
  * reachable at the time the inode was written.
  */
 static void
 ino_adjblks(struct suj_ino *sino)
 {
 	union dinode *ip;
 	uint64_t blocks;
 	uint64_t frags;
 	off_t isize;
 	off_t size;
 	ino_t ino;
 
 	ino = sino->si_ino;
 	ip = ino_read(ino);
 	/* No need to adjust zero'd inodes. */
 	if (DIP(ip, di_mode) == 0)
 		return;
 	/*
 	 * Visit all blocks and count them as well as recording the last
 	 * valid lbn in the file.  If the file size doesn't agree with the
 	 * last lbn we need to truncate to fix it.  Otherwise just adjust
 	 * the blocks count.
 	 */
 	visitlbn = 0;
 	frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
 	blocks = fsbtodb(fs, frags);
 	/*
 	 * We assume the size and direct block list is kept coherent by
 	 * softdep.  For files that have extended into indirects we truncate
 	 * to the size in the inode or the maximum size permitted by
 	 * populated indirects.
 	 */
 	if (visitlbn >= UFS_NDADDR) {
 		isize = DIP(ip, di_size);
 		size = lblktosize(fs, visitlbn + 1);
 		if (isize > size)
 			isize = size;
 		/* Always truncate to free any unpopulated indirects. */
 		ino_trunc(sino->si_ino, isize);
 		return;
 	}
 	if (blocks == DIP(ip, di_blocks))
 		return;
 	if (debug)
 		printf("ino %ju adjusting block count from %jd to %jd\n",
 		    (uintmax_t)ino, DIP(ip, di_blocks), blocks);
 	DIP_SET(ip, di_blocks, blocks);
 	ino_dirty(ino);
 }
 
 static void
 blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 
 	blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags);
 }
 
 /*
  * Free a block or tree of blocks that was previously rooted in ino at
  * the given lbn.  If the lbn is an indirect all children are freed
  * recursively.
  */
 static void
 blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow)
 {
 	uint64_t resid;
 	int mask;
 
 	mask = blk_freemask(blk, ino, lbn, frags);
 	resid = 0;
 	if (lbn <= -UFS_NDADDR && follow && mask == 0)
 		indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR);
 	else
 		blk_free(blk, mask, frags);
 }
 
 static void
 ino_setskip(struct suj_ino *sino, ino_t parent)
 {
 	int isdot;
 	int mode;
 
 	if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot))
 		sino->si_skipparent = 1;
 }
 
 static void
 ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot)
 {
 	struct suj_ino *sino;
 	struct suj_rec *srec;
 	struct jrefrec *rrec;
 
 	/*
 	 * Lookup this inode to see if we have a record for it.
 	 */
 	sino = ino_lookup(child, 0);
 	/*
 	 * Tell any child directories we've already removed their
 	 * parent link cnt.  Don't try to adjust our link down again.
 	 */
 	if (sino != NULL && isdotdot == 0)
 		ino_setskip(sino, parent);
 	/*
 	 * No valid record for this inode.  Just drop the on-disk
 	 * link by one.
 	 */
 	if (sino == NULL || sino->si_hasrecs == 0) {
 		ino_decr(child);
 		return;
 	}
 	/*
 	 * Use ino_adjust() if ino_check() has already processed this
 	 * child.  If we lose the last non-dot reference to a
 	 * directory it will be discarded.
 	 */
 	if (sino->si_linkadj) {
 		sino->si_nlink--;
 		if (isdotdot)
 			sino->si_dotlinks--;
 		ino_adjust(sino);
 		return;
 	}
 	/*
 	 * If we haven't yet processed this inode we need to make
 	 * sure we will successfully discover the lost path.  If not
 	 * use nlinkadj to remember.
 	 */
 	TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 		rrec = (struct jrefrec *)srec->sr_rec;
 		if (rrec->jr_parent == parent &&
 		    rrec->jr_diroff == diroff)
 			return;
 	}
 	sino->si_nlinkadj++;
 }
 
 /*
  * Free the children of a directory when the directory is discarded.
  */
 static void
 ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	struct suj_ino *sino;
 	struct direct *dp;
 	off_t diroff;
 	uint8_t *block;
 	int skipparent;
 	int isdotdot;
 	int dpoff;
 	int size;
 
 	sino = ino_lookup(ino, 0);
 	if (sino)
 		skipparent = sino->si_skipparent;
 	else
 		skipparent = 0;
 	size = lfragtosize(fs, frags);
 	block = dblk_read(blk, size);
 	dp = (struct direct *)&block[0];
 	for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) {
 		dp = (struct direct *)&block[dpoff];
 		if (dp->d_ino == 0 || dp->d_ino == UFS_WINO)
 			continue;
 		if (dp->d_namlen == 1 && dp->d_name[0] == '.')
 			continue;
 		isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' &&
 		    dp->d_name[1] == '.';
 		if (isdotdot && skipparent == 1)
 			continue;
 		if (debug)
 			printf("Directory %ju removing ino %ju name %s\n",
 			    (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name);
 		diroff = lblktosize(fs, lbn) + dpoff;
 		ino_remref(ino, dp->d_ino, diroff, isdotdot);
 	}
 }
 
 /*
  * Reclaim an inode, freeing all blocks and decrementing all children's
  * link counts.  Free the inode back to the cg.
  */
 static void
 ino_reclaim(union dinode *ip, ino_t ino, int mode)
 {
 	uint32_t gen;
 
 	if (ino == UFS_ROOTINO)
 		err_suj("Attempting to free UFS_ROOTINO\n");
 	if (debug)
 		printf("Truncating and freeing ino %ju, nlink %d, mode %o\n",
 		    (uintmax_t)ino, DIP(ip, di_nlink), DIP(ip, di_mode));
 
 	/* We are freeing an inode or directory. */
 	if ((DIP(ip, di_mode) & IFMT) == IFDIR)
 		ino_visit(ip, ino, ino_free_children, 0);
 	DIP_SET(ip, di_nlink, 0);
 	ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR);
 	/* Here we have to clear the inode and release any blocks it holds. */
 	gen = DIP(ip, di_gen);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		bzero(ip, sizeof(struct ufs1_dinode));
 	else
 		bzero(ip, sizeof(struct ufs2_dinode));
 	DIP_SET(ip, di_gen, gen);
 	ino_dirty(ino);
 	ino_free(ino, mode);
 	return;
 }
 
 /*
  * Adjust an inode's link count down by one when a directory goes away.
  */
 static void
 ino_decr(ino_t ino)
 {
 	union dinode *ip;
 	int reqlink;
 	int nlink;
 	int mode;
 
 	ip = ino_read(ino);
 	nlink = DIP(ip, di_nlink);
 	mode = DIP(ip, di_mode);
 	if (nlink < 1)
 		err_suj("Inode %d link count %d invalid\n", ino, nlink);
 	if (mode == 0)
 		err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink);
 	nlink--;
 	if ((mode & IFMT) == IFDIR)
 		reqlink = 2;
 	else
 		reqlink = 1;
 	if (nlink < reqlink) {
 		if (debug)
 			printf("ino %ju not enough links to live %d < %d\n",
 			    (uintmax_t)ino, nlink, reqlink);
 		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	DIP_SET(ip, di_nlink, nlink);
 	ino_dirty(ino);
 }
 
 /*
  * Adjust the inode link count to 'nlink'.  If the count reaches zero
  * free it.
  */
 static void
 ino_adjust(struct suj_ino *sino)
 {
 	struct jrefrec *rrec;
 	struct suj_rec *srec;
 	struct suj_ino *stmp;
 	union dinode *ip;
 	nlink_t nlink;
+	nlink_t reqlink;
 	int recmode;
-	int reqlink;
 	int isdot;
 	int mode;
 	ino_t ino;
 
 	nlink = sino->si_nlink;
 	ino = sino->si_ino;
 	mode = sino->si_mode & IFMT;
 	/*
 	 * If it's a directory with no dot links, it was truncated before
 	 * the name was cleared.  We need to clear the dirent that
 	 * points at it.
 	 */
 	if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) {
 		sino->si_nlink = nlink = 0;
 		TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 			rrec = (struct jrefrec *)srec->sr_rec;
 			if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino,
 			    &recmode, &isdot) == 0)
 				continue;
 			ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino);
 			break;
 		}
 		if (srec == NULL)
 			errx(1, "Directory %ju name not found", (uintmax_t)ino);
 	}
 	/*
 	 * If it's a directory with no real names pointing to it go ahead
 	 * and truncate it.  This will free any children.
 	 */
 	if (mode == IFDIR && nlink - sino->si_dotlinks == 0) {
 		sino->si_nlink = nlink = 0;
 		/*
 		 * Mark any .. links so they know not to free this inode
 		 * when they are removed.
 		 */
 		TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 			rrec = (struct jrefrec *)srec->sr_rec;
 			if (rrec->jr_diroff == DOTDOT_OFFSET) {
 				stmp = ino_lookup(rrec->jr_parent, 0);
 				if (stmp)
 					ino_setskip(stmp, ino);
 			}
 		}
 	}
 	ip = ino_read(ino);
 	mode = DIP(ip, di_mode) & IFMT;
 	if (nlink > LINK_MAX)
 		err_suj("ino %ju nlink manipulation error, new %ju, old %d\n",
 		    (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink));
 	if (debug)
 	       printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n",
 		    (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink),
 		    sino->si_mode);
 	if (mode == 0) {
 		if (debug)
 			printf("ino %ju, zero inode freeing bitmap\n",
 			    (uintmax_t)ino);
 		ino_free(ino, sino->si_mode);
 		return;
 	}
 	/* XXX Should be an assert? */
 	if (mode != sino->si_mode && debug)
 		printf("ino %ju, mode %o != %o\n",
 		    (uintmax_t)ino, mode, sino->si_mode);
 	if ((mode & IFMT) == IFDIR)
 		reqlink = 2;
 	else
 		reqlink = 1;
 	/* If the inode doesn't have enough links to live, free it. */
 	if (nlink < reqlink) {
 		if (debug)
 			printf("ino %ju not enough links to live %ju < %ju\n",
 			    (uintmax_t)ino, (uintmax_t)nlink,
 			    (uintmax_t)reqlink);
 		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	/* If required write the updated link count. */
 	if (DIP(ip, di_nlink) == nlink) {
 		if (debug)
 			printf("ino %ju, link matches, skipping.\n",
 			    (uintmax_t)ino);
 		return;
 	}
 	DIP_SET(ip, di_nlink, nlink);
 	ino_dirty(ino);
 }
 
 /*
  * Truncate some or all blocks in an indirect, freeing any that are required
  * and zeroing the indirect.
  */
 static void
 indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn)
 {
 	ufs2_daddr_t *bap2;
 	ufs1_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs2_daddr_t nblk;
 	ufs_lbn_t next;
 	ufs_lbn_t nlbn;
 	int dirty;
 	int level;
 	int i;
 
 	if (blk == 0)
 		return;
 	dirty = 0;
 	level = lbn_level(lbn);
 	if (level == -1)
 		err_suj("Invalid level for lbn %jd\n", lbn);
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	bap1 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap2 = (void *)bap1;
 	for (i = 0; i < NINDIR(fs); i++) {
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			nblk = *bap1++;
 		else
 			nblk = *bap2++;
 		if (nblk == 0)
 			continue;
 		if (level != 0) {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			/*
 			 * Calculate the lbn of the next indirect to
 			 * determine if any of this indirect must be
 			 * reclaimed.
 			 */
 			next = -(lbn + level) + ((i+1) * lbnadd);
 			if (next <= lastlbn)
 				continue;
 			indir_trunc(ino, nlbn, nblk, lastlbn);
 			/* If all of this indirect was reclaimed, free it. */
 			nlbn = next - lbnadd;
 			if (nlbn < lastlbn)
 				continue;
 		} else {
 			nlbn = -lbn + i * lbnadd;
 			if (nlbn < lastlbn)
 				continue;
 		}
 		dirty = 1;
 		blk_free(nblk, 0, fs->fs_frag);
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			*(bap1 - 1) = 0;
 		else
 			*(bap2 - 1) = 0;
 	}
 	if (dirty)
 		dblk_dirty(blk);
 }
 
 /*
  * Truncate an inode to the minimum of the given size or the last populated
  * block after any over size have been discarded.  The kernel would allocate
  * the last block in the file but fsck does not and neither do we.  This
  * code never extends files, only shrinks them.
  */
 static void
 ino_trunc(ino_t ino, off_t size)
 {
 	union dinode *ip;
 	ufs2_daddr_t bn;
 	uint64_t totalfrags;
 	ufs_lbn_t nextlbn;
 	ufs_lbn_t lastlbn;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 	ufs_lbn_t i;
 	int frags;
 	off_t cursize;
 	off_t off;
 	int mode;
 
 	ip = ino_read(ino);
 	mode = DIP(ip, di_mode) & IFMT;
 	cursize = DIP(ip, di_size);
 	if (debug)
 		printf("Truncating ino %ju, mode %o to size %jd from size %jd\n",
 		    (uintmax_t)ino, mode, size, cursize);
 
 	/* Skip datablocks for short links and devices. */
 	if (mode == 0 || mode == IFBLK || mode == IFCHR ||
 	    (mode == IFLNK && cursize < fs->fs_maxsymlinklen))
 		return;
 	/* Don't extend. */
 	if (size > cursize)
 		size = cursize;
 	lastlbn = lblkno(fs, blkroundup(fs, size));
 	for (i = lastlbn; i < UFS_NDADDR; i++) {
 		if (DIP(ip, di_db[i]) == 0)
 			continue;
 		frags = sblksize(fs, cursize, i);
 		frags = numfrags(fs, frags);
 		blk_free(DIP(ip, di_db[i]), 0, frags);
 		DIP_SET(ip, di_db[i], 0);
 	}
 	/*
 	 * Follow indirect blocks, freeing anything required.
 	 */
 	for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++,
 	    lbn = nextlbn) {
 		nextlbn = lbn + tmpval;
 		tmpval *= NINDIR(fs);
 		/* If we're not freeing any in this indirect range skip it. */
 		if (lastlbn >= nextlbn)
 			continue;
 		if (DIP(ip, di_ib[i]) == 0)
 			continue;
 		indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn);
 		/* If we freed everything in this indirect free the indir. */
 		if (lastlbn > lbn)
 			continue;
 		blk_free(DIP(ip, di_ib[i]), 0, frags);
 		DIP_SET(ip, di_ib[i], 0);
 	}
 	ino_dirty(ino);
 	/*
 	 * Now that we've freed any whole blocks that exceed the desired
 	 * truncation size, figure out how many blocks remain and what the
 	 * last populated lbn is.  We will set the size to this last lbn
 	 * rather than worrying about allocating the final lbn as the kernel
 	 * would've done.  This is consistent with normal fsck behavior.
 	 */
 	visitlbn = 0;
 	totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
 	if (size > lblktosize(fs, visitlbn + 1))
 		size = lblktosize(fs, visitlbn + 1);
 	/*
 	 * If we're truncating direct blocks we have to adjust frags
 	 * accordingly.
 	 */
 	if (visitlbn < UFS_NDADDR && totalfrags) {
 		long oldspace, newspace;
 
 		bn = DIP(ip, di_db[visitlbn]);
 		if (bn == 0)
 			err_suj("Bad blk at ino %ju lbn %jd\n",
 			    (uintmax_t)ino, visitlbn);
 		oldspace = sblksize(fs, cursize, visitlbn);
 		newspace = sblksize(fs, size, visitlbn);
 		if (oldspace != newspace) {
 			bn += numfrags(fs, newspace);
 			frags = numfrags(fs, oldspace - newspace);
 			blk_free(bn, 0, frags);
 			totalfrags -= frags;
 		}
 	}
 	DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags));
 	DIP_SET(ip, di_size, size);
 	/*
 	 * If we've truncated into the middle of a block or frag we have
 	 * to zero it here.  Otherwise the file could extend into
 	 * uninitialized space later.
 	 */
 	off = blkoff(fs, size);
 	if (off && DIP(ip, di_mode) != IFDIR) {
 		uint8_t *buf;
 		long clrsize;
 
 		bn = ino_blkatoff(ip, ino, visitlbn, &frags);
 		if (bn == 0)
 			err_suj("Block missing from ino %ju at lbn %jd\n",
 			    (uintmax_t)ino, visitlbn);
 		clrsize = frags * fs->fs_fsize;
 		buf = dblk_read(bn, clrsize);
 		clrsize -= off;
 		buf += off;
 		bzero(buf, clrsize);
 		dblk_dirty(bn);
 	}
 	return;
 }
 
 /*
  * Process records available for one inode and determine whether the
  * link count is correct or needs adjusting.
  */
 static void
 ino_check(struct suj_ino *sino)
 {
 	struct suj_rec *srec;
 	struct jrefrec *rrec;
 	nlink_t dotlinks;
-	int newlinks;
-	int removes;
-	int nlink;
+	nlink_t newlinks;
+	nlink_t removes;
+	nlink_t nlink;
 	ino_t ino;
 	int isdot;
 	int isat;
 	int mode;
 
 	if (sino->si_hasrecs == 0)
 		return;
 	ino = sino->si_ino;
 	rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec;
 	nlink = rrec->jr_nlink;
 	newlinks = 0;
 	dotlinks = 0;
 	removes = sino->si_nlinkadj;
 	TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 		rrec = (struct jrefrec *)srec->sr_rec;
 		isat = ino_isat(rrec->jr_parent, rrec->jr_diroff,
 		    rrec->jr_ino, &mode, &isdot);
 		if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT))
 			err_suj("Inode mode/directory type mismatch %o != %o\n",
 			    mode, rrec->jr_mode);
 		if (debug)
 			printf("jrefrec: op %d ino %ju, nlink %ju, parent %ju, "
 			    "diroff %jd, mode %o, isat %d, isdot %d\n",
 			    rrec->jr_op, (uintmax_t)rrec->jr_ino,
 			    (uintmax_t)rrec->jr_nlink,
 			    (uintmax_t)rrec->jr_parent,
 			    (uintmax_t)rrec->jr_diroff,
 			    rrec->jr_mode, isat, isdot);
 		mode = rrec->jr_mode & IFMT;
 		if (rrec->jr_op == JOP_REMREF)
 			removes++;
 		newlinks += isat;
 		if (isdot)
 			dotlinks += isat;
 	}
 	/*
 	 * The number of links that remain are the starting link count
 	 * subtracted by the total number of removes with the total
 	 * links discovered back in.  An incomplete remove thus
 	 * makes no change to the link count but an add increases
 	 * by one.
 	 */
 	if (debug)
 		printf(
 		    "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n",
 		    (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks,
 		    (uintmax_t)removes, (uintmax_t)dotlinks);
 	nlink += newlinks;
 	nlink -= removes;
 	sino->si_linkadj = 1;
 	sino->si_nlink = nlink;
 	sino->si_dotlinks = dotlinks;
 	sino->si_mode = mode;
 	ino_adjust(sino);
 }
 
 /*
  * Process records available for one block and determine whether it is
  * still allocated and whether the owning inode needs to be updated or
  * a free completed.
  */
 static void
 blk_check(struct suj_blk *sblk)
 {
 	struct suj_rec *srec;
 	struct jblkrec *brec;
 	struct suj_ino *sino;
 	ufs2_daddr_t blk;
 	int mask;
 	int frags;
 	int isat;
 
 	/*
 	 * Each suj_blk actually contains records for any fragments in that
 	 * block.  As a result we must evaluate each record individually.
 	 */
 	sino = NULL;
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		brec = (struct jblkrec *)srec->sr_rec;
 		frags = brec->jb_frags;
 		blk = brec->jb_blkno + brec->jb_oldfrags;
 		isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags);
 		if (sino == NULL || sino->si_ino != brec->jb_ino) {
 			sino = ino_lookup(brec->jb_ino, 1);
 			sino->si_blkadj = 1;
 		}
 		if (debug)
 			printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n",
 			    brec->jb_op, blk, (uintmax_t)brec->jb_ino,
 			    brec->jb_lbn, brec->jb_frags, isat, frags);
 		/*
 		 * If we found the block at this address we still have to
 		 * determine if we need to free the tail end that was
 		 * added by adding contiguous fragments from the same block.
 		 */
 		if (isat == 1) {
 			if (frags == brec->jb_frags)
 				continue;
 			mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn,
 			    brec->jb_frags);
 			mask >>= frags;
 			blk += frags;
 			frags = brec->jb_frags - frags;
 			blk_free(blk, mask, frags);
 			continue;
 		}
 		/*
 	 	 * The block wasn't found, attempt to free it.  It won't be
 		 * freed if it was actually reallocated.  If this was an
 		 * allocation we don't want to follow indirects as they
 		 * may not be written yet.  Any children of the indirect will
 		 * have their own records.  If it's a free we need to
 		 * recursively free children.
 		 */
 		blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags,
 		    brec->jb_op == JOP_FREEBLK);
 	}
 }
 
 /*
  * Walk the list of inode records for this cg and resolve moved and duplicate
  * inode references now that we have a complete picture.
  */
 static void
 cg_build(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
 			ino_build(sino);
 }
 
 /*
  * Handle inodes requiring truncation.  This must be done prior to
  * looking up any inodes in directories.
  */
 static void
 cg_trunc(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
 			if (sino->si_trunc) {
 				ino_trunc(sino->si_ino,
 				    sino->si_trunc->jt_size);
 				sino->si_blkadj = 0;
 				sino->si_trunc = NULL;
 			}
 			if (sino->si_blkadj)
 				ino_adjblks(sino);
 		}
 	}
 }
 
 static void
 cg_adj_blk(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
 			if (sino->si_blkadj)
 				ino_adjblks(sino);
 		}
 	}
 }
 
 /*
  * Free any partially allocated blocks and then resolve inode block
  * counts.
  */
 static void
 cg_check_blk(struct suj_cg *sc)
 {
 	struct suj_blk *sblk;
 	int i;
 
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
 			blk_check(sblk);
 }
 
 /*
  * Walk the list of inode records for this cg, recovering any
  * changes which were not complete at the time of crash.
  */
 static void
 cg_check_ino(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
 			ino_check(sino);
 }
 
 /*
  * Write a potentially dirty cg.  Recalculate the summary information and
  * update the superblock summary.
  */
 static void
 cg_write(struct suj_cg *sc)
 {
 	ufs1_daddr_t fragno, cgbno, maxbno;
 	u_int8_t *blksfree;
 	struct cg *cgp;
 	int blk;
 	int i;
 
 	if (sc->sc_dirty == 0)
 		return;
 	/*
 	 * Fix the frag and cluster summary.
 	 */
 	cgp = sc->sc_cgp;
 	cgp->cg_cs.cs_nbfree = 0;
 	cgp->cg_cs.cs_nffree = 0;
 	bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
 	maxbno = fragstoblks(fs, fs->fs_fpg);
 	if (fs->fs_contigsumsize > 0) {
 		for (i = 1; i <= fs->fs_contigsumsize; i++)
 			cg_clustersum(cgp)[i] = 0;
 		bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
 	}
 	blksfree = cg_blksfree(cgp);
 	for (cgbno = 0; cgbno < maxbno; cgbno++) {
 		if (ffs_isfreeblock(fs, blksfree, cgbno))
 			continue;
 		if (ffs_isblock(fs, blksfree, cgbno)) {
 			ffs_clusteracct(fs, cgp, cgbno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			continue;
 		}
 		fragno = blkstofrags(fs, cgbno);
 		blk = blkmap(fs, blksfree, fragno);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		for (i = 0; i < fs->fs_frag; i++)
 			if (isset(blksfree, fragno + i))
 				cgp->cg_cs.cs_nffree++;
 	}
 	/*
 	 * Update the superblock cg summary from our now correct values
 	 * before writing the block.
 	 */
 	fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs;
 	if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf,
 	    fs->fs_bsize) == -1)
 		err_suj("Unable to write cylinder group %d\n", sc->sc_cgx);
 }
 
 /*
  * Write out any modified inodes.
  */
 static void
 cg_write_inos(struct suj_cg *sc)
 {
 	struct ino_blk *iblk;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
 			if (iblk->ib_dirty)
 				iblk_write(iblk);
 }
 
 static void
 cg_apply(void (*apply)(struct suj_cg *))
 {
 	struct suj_cg *scg;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(scg, &cghash[i], sc_next)
 			apply(scg);
 }
 
 /*
  * Process the unlinked but referenced file list.  Freeing all inodes.
  */
 static void
 ino_unlinked(void)
 {
 	union dinode *ip;
 	uint16_t mode;
 	ino_t inon;
 	ino_t ino;
 
 	ino = fs->fs_sujfree;
 	fs->fs_sujfree = 0;
 	while (ino != 0) {
 		ip = ino_read(ino);
 		mode = DIP(ip, di_mode) & IFMT;
 		inon = DIP(ip, di_freelink);
 		DIP_SET(ip, di_freelink, 0);
 		/*
 		 * XXX Should this be an errx?
 		 */
 		if (DIP(ip, di_nlink) == 0) {
 			if (debug)
 				printf("Freeing unlinked ino %ju mode %o\n",
 				    (uintmax_t)ino, mode);
 			ino_reclaim(ip, ino, mode);
 		} else if (debug)
 			printf("Skipping ino %ju mode %o with link %d\n",
 			    (uintmax_t)ino, mode, DIP(ip, di_nlink));
 		ino = inon;
 	}
 }
 
 /*
  * Append a new record to the list of records requiring processing.
  */
 static void
 ino_append(union jrec *rec)
 {
 	struct jrefrec *refrec;
 	struct jmvrec *mvrec;
 	struct suj_ino *sino;
 	struct suj_rec *srec;
 
 	mvrec = &rec->rec_jmvrec;
 	refrec = &rec->rec_jrefrec;
 	if (debug && mvrec->jm_op == JOP_MVREF)
 		printf("ino move: ino %ju, parent %ju, "
 		    "diroff %jd, oldoff %jd\n",
 		    (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent,
 		    (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff);
 	else if (debug &&
 	    (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF))
 		printf("ino ref: op %d, ino %ju, nlink %ju, "
 		    "parent %ju, diroff %jd\n",
 		    refrec->jr_op, (uintmax_t)refrec->jr_ino,
 		    (uintmax_t)refrec->jr_nlink,
 		    (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff);
 	sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
 	sino->si_hasrecs = 1;
 	srec = errmalloc(sizeof(*srec));
 	srec->sr_rec = rec;
 	TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next);
 }
 
 /*
  * Add a reference adjustment to the sino list and eliminate dups.  The
  * primary loop in ino_build_ref() checks for dups but new ones may be
  * created as a result of offset adjustments.
  */
 static void
 ino_add_ref(struct suj_ino *sino, struct suj_rec *srec)
 {
 	struct jrefrec *refrec;
 	struct suj_rec *srn;
 	struct jrefrec *rrn;
 
 	refrec = (struct jrefrec *)srec->sr_rec;
 	/*
 	 * We walk backwards so that the oldest link count is preserved.  If
 	 * an add record conflicts with a remove keep the remove.  Redundant
 	 * removes are eliminated in ino_build_ref.  Otherwise we keep the
 	 * oldest record at a given location.
 	 */
 	for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn;
 	    srn = TAILQ_PREV(srn, srechd, sr_next)) {
 		rrn = (struct jrefrec *)srn->sr_rec;
 		if (rrn->jr_parent != refrec->jr_parent ||
 		    rrn->jr_diroff != refrec->jr_diroff)
 			continue;
 		if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) {
 			rrn->jr_mode = refrec->jr_mode;
 			return;
 		}
 		/*
 		 * Adding a remove.
 		 *
 		 * Replace the record in place with the old nlink in case
 		 * we replace the head of the list.  Abandon srec as a dup.
 		 */
 		refrec->jr_nlink = rrn->jr_nlink;
 		srn->sr_rec = srec->sr_rec;
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next);
 }
 
 /*
  * Create a duplicate of a reference at a previous location.
  */
 static void
 ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff)
 {
 	struct jrefrec *rrn;
 	struct suj_rec *srn;
 
 	rrn = errmalloc(sizeof(*refrec));
 	*rrn = *refrec;
 	rrn->jr_op = JOP_ADDREF;
 	rrn->jr_diroff = diroff;
 	srn = errmalloc(sizeof(*srn));
 	srn->sr_rec = (union jrec *)rrn;
 	ino_add_ref(sino, srn);
 }
 
 /*
  * Add a reference to the list at all known locations.  We follow the offset
  * changes for a single instance and create duplicate add refs at each so
  * that we can tolerate any version of the directory block.  Eliminate
  * removes which collide with adds that are seen in the journal.  They should
  * not adjust the link count down.
  */
 static void
 ino_build_ref(struct suj_ino *sino, struct suj_rec *srec)
 {
 	struct jrefrec *refrec;
 	struct jmvrec *mvrec;
 	struct suj_rec *srp;
 	struct suj_rec *srn;
 	struct jrefrec *rrn;
 	off_t diroff;
 
 	refrec = (struct jrefrec *)srec->sr_rec;
 	/*
 	 * Search for a mvrec that matches this offset.  Whether it's an add
 	 * or a remove we can delete the mvref after creating a dup record in
 	 * the old location.
 	 */
 	if (!TAILQ_EMPTY(&sino->si_movs)) {
 		diroff = refrec->jr_diroff;
 		for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) {
 			srp = TAILQ_PREV(srn, srechd, sr_next);
 			mvrec = (struct jmvrec *)srn->sr_rec;
 			if (mvrec->jm_parent != refrec->jr_parent ||
 			    mvrec->jm_newoff != diroff)
 				continue;
 			diroff = mvrec->jm_oldoff;
 			TAILQ_REMOVE(&sino->si_movs, srn, sr_next);
 			free(srn);
 			ino_dup_ref(sino, refrec, diroff);
 		}
 	}
 	/*
 	 * If a remove wasn't eliminated by an earlier add just append it to
 	 * the list.
 	 */
 	if (refrec->jr_op == JOP_REMREF) {
 		ino_add_ref(sino, srec);
 		return;
 	}
 	/*
 	 * Walk the list of records waiting to be added to the list.  We
 	 * must check for moves that apply to our current offset and remove
 	 * them from the list.  Remove any duplicates to eliminate removes
 	 * with corresponding adds.
 	 */
 	TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) {
 		switch (srn->sr_rec->rec_jrefrec.jr_op) {
 		case JOP_ADDREF:
 			/*
 			 * This should actually be an error we should
 			 * have a remove for every add journaled.
 			 */
 			rrn = (struct jrefrec *)srn->sr_rec;
 			if (rrn->jr_parent != refrec->jr_parent ||
 			    rrn->jr_diroff != refrec->jr_diroff)
 				break;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			break;
 		case JOP_REMREF:
 			/*
 			 * Once we remove the current iteration of the
 			 * record at this address we're done.
 			 */
 			rrn = (struct jrefrec *)srn->sr_rec;
 			if (rrn->jr_parent != refrec->jr_parent ||
 			    rrn->jr_diroff != refrec->jr_diroff)
 				break;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			ino_add_ref(sino, srec);
 			return;
 		case JOP_MVREF:
 			/*
 			 * Update our diroff based on any moves that match
 			 * and remove the move.
 			 */
 			mvrec = (struct jmvrec *)srn->sr_rec;
 			if (mvrec->jm_parent != refrec->jr_parent ||
 			    mvrec->jm_oldoff != refrec->jr_diroff)
 				break;
 			ino_dup_ref(sino, refrec, mvrec->jm_oldoff);
 			refrec->jr_diroff = mvrec->jm_newoff;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			break;
 		default:
 			err_suj("ino_build_ref: Unknown op %d\n",
 			    srn->sr_rec->rec_jrefrec.jr_op);
 		}
 	}
 	ino_add_ref(sino, srec);
 }
 
 /*
  * Walk the list of new records and add them in-order resolving any
  * dups and adjusted offsets.
  */
 static void
 ino_build(struct suj_ino *sino)
 {
 	struct suj_rec *srec;
 
 	while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) {
 		TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next);
 		switch (srec->sr_rec->rec_jrefrec.jr_op) {
 		case JOP_ADDREF:
 		case JOP_REMREF:
 			ino_build_ref(sino, srec);
 			break;
 		case JOP_MVREF:
 			/*
 			 * Add this mvrec to the queue of pending mvs.
 			 */
 			TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next);
 			break;
 		default:
 			err_suj("ino_build: Unknown op %d\n",
 			    srec->sr_rec->rec_jrefrec.jr_op);
 		}
 	}
 	if (TAILQ_EMPTY(&sino->si_recs))
 		sino->si_hasrecs = 0;
 }
 
 /*
  * Modify journal records so they refer to the base block number
  * and a start and end frag range.  This is to facilitate the discovery
  * of overlapping fragment allocations.
  */
 static void
 blk_build(struct jblkrec *blkrec)
 {
 	struct suj_rec *srec;
 	struct suj_blk *sblk;
 	struct jblkrec *blkrn;
 	ufs2_daddr_t blk;
 	int frag;
 
 	if (debug)
 		printf("blk_build: op %d blkno %jd frags %d oldfrags %d "
 		    "ino %ju lbn %jd\n",
 		    blkrec->jb_op, (uintmax_t)blkrec->jb_blkno,
 		    blkrec->jb_frags, blkrec->jb_oldfrags,
 		    (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn);
 
 	blk = blknum(fs, blkrec->jb_blkno);
 	frag = fragnum(fs, blkrec->jb_blkno);
 	sblk = blk_lookup(blk, 1);
 	/*
 	 * Rewrite the record using oldfrags to indicate the offset into
 	 * the block.  Leave jb_frags as the actual allocated count.
 	 */
 	blkrec->jb_blkno -= frag;
 	blkrec->jb_oldfrags = frag;
 	if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag)
 		err_suj("Invalid fragment count %d oldfrags %d\n",
 		    blkrec->jb_frags, frag);
 	/*
 	 * Detect dups.  If we detect a dup we always discard the oldest
 	 * record as it is superseded by the new record.  This speeds up
 	 * later stages but also eliminates free records which are used
 	 * to indicate that the contents of indirects can be trusted.
 	 */
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		blkrn = (struct jblkrec *)srec->sr_rec;
 		if (blkrn->jb_ino != blkrec->jb_ino ||
 		    blkrn->jb_lbn != blkrec->jb_lbn ||
 		    blkrn->jb_blkno != blkrec->jb_blkno ||
 		    blkrn->jb_frags != blkrec->jb_frags ||
 		    blkrn->jb_oldfrags != blkrec->jb_oldfrags)
 			continue;
 		if (debug)
 			printf("Removed dup.\n");
 		/* Discard the free which is a dup with an alloc. */
 		if (blkrec->jb_op == JOP_FREEBLK)
 			return;
 		TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next);
 		free(srec);
 		break;
 	}
 	srec = errmalloc(sizeof(*srec));
 	srec->sr_rec = (union jrec *)blkrec;
 	TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next);
 }
 
 static void
 ino_build_trunc(struct jtrncrec *rec)
 {
 	struct suj_ino *sino;
 
 	if (debug)
 		printf("ino_build_trunc: op %d ino %ju, size %jd\n",
 		    rec->jt_op, (uintmax_t)rec->jt_ino,
 		    (uintmax_t)rec->jt_size);
 	sino = ino_lookup(rec->jt_ino, 1);
 	if (rec->jt_op == JOP_SYNC) {
 		sino->si_trunc = NULL;
 		return;
 	}
 	if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size)
 		sino->si_trunc = rec;
 }
 
 /*
  * Build up tables of the operations we need to recover.
  */
 static void
 suj_build(void)
 {
 	struct suj_seg *seg;
 	union jrec *rec;
 	int off;
 	int i;
 
 	TAILQ_FOREACH(seg, &allsegs, ss_next) {
 		if (debug)
 			printf("seg %jd has %d records, oldseq %jd.\n",
 			    seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt,
 			    seg->ss_rec.jsr_oldest);
 		off = 0;
 		rec = (union jrec *)seg->ss_blk;
 		for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) {
 			/* skip the segrec. */
 			if ((off % real_dev_bsize) == 0)
 				continue;
 			switch (rec->rec_jrefrec.jr_op) {
 			case JOP_ADDREF:
 			case JOP_REMREF:
 			case JOP_MVREF:
 				ino_append(rec);
 				break;
 			case JOP_NEWBLK:
 			case JOP_FREEBLK:
 				blk_build((struct jblkrec *)rec);
 				break;
 			case JOP_TRUNC:
 			case JOP_SYNC:
 				ino_build_trunc((struct jtrncrec *)rec);
 				break;
 			default:
 				err_suj("Unknown journal operation %d (%d)\n",
 				    rec->rec_jrefrec.jr_op, off);
 			}
 			i++;
 		}
 	}
 }
 
 /*
  * Prune the journal segments to those we care about based on the
  * oldest sequence in the newest segment.  Order the segment list
  * based on sequence number.
  */
 static void
 suj_prune(void)
 {
 	struct suj_seg *seg;
 	struct suj_seg *segn;
 	uint64_t newseq;
 	int discard;
 
 	if (debug)
 		printf("Pruning up to %jd\n", oldseq);
 	/* First free the expired segments. */
 	TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 		if (seg->ss_rec.jsr_seq >= oldseq)
 			continue;
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		free(seg->ss_blk);
 		free(seg);
 	}
 	/* Next ensure that segments are ordered properly. */
 	seg = TAILQ_FIRST(&allsegs);
 	if (seg == NULL) {
 		if (debug)
 			printf("Empty journal\n");
 		return;
 	}
 	newseq = seg->ss_rec.jsr_seq;
 	for (;;) {
 		seg = TAILQ_LAST(&allsegs, seghd);
 		if (seg->ss_rec.jsr_seq >= newseq)
 			break;
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		TAILQ_INSERT_HEAD(&allsegs, seg, ss_next);
 		newseq = seg->ss_rec.jsr_seq;
 
 	}
 	if (newseq != oldseq) {
 		TAILQ_FOREACH(seg, &allsegs, ss_next) {
 			printf("%jd, ", seg->ss_rec.jsr_seq);
 		}
 		printf("\n");
 		err_suj("Journal file sequence mismatch %jd != %jd\n",
 		    newseq, oldseq);
 	}
 	/*
 	 * The kernel may asynchronously write segments which can create
 	 * gaps in the sequence space.  Throw away any segments after the
 	 * gap as the kernel guarantees only those that are contiguously
 	 * reachable are marked as completed.
 	 */
 	discard = 0;
 	TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 		if (!discard && newseq++ == seg->ss_rec.jsr_seq) {
 			jrecs += seg->ss_rec.jsr_cnt;
 			jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize;
 			continue;
 		}
 		discard = 1;
 		if (debug)
 			printf("Journal order mismatch %jd != %jd pruning\n",
 			    newseq-1, seg->ss_rec.jsr_seq);
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		free(seg->ss_blk);
 		free(seg);
 	}
 	if (debug)
 		printf("Processing journal segments from %jd to %jd\n",
 		    oldseq, newseq-1);
 }
 
 /*
  * Verify the journal inode before attempting to read records.
  */
 static int
 suj_verifyino(union dinode *ip)
 {
 
 	if (DIP(ip, di_nlink) != 1) {
 		printf("Invalid link count %d for journal inode %ju\n",
 		    DIP(ip, di_nlink), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) !=
 	    (SF_IMMUTABLE | SF_NOUNLINK)) {
 		printf("Invalid flags 0x%X for journal inode %ju\n",
 		    DIP(ip, di_flags), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_mode) != (IFREG | IREAD)) {
 		printf("Invalid mode %o for journal inode %ju\n",
 		    DIP(ip, di_mode), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_size) < SUJ_MIN) {
 		printf("Invalid size %jd for journal inode %ju\n",
 		    DIP(ip, di_size), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_modrev) != fs->fs_mtime) {
 		printf("Journal timestamp does not match fs mount time\n");
 		return (-1);
 	}
 
 	return (0);
 }
 
 struct jblocks {
 	struct jextent *jb_extent;	/* Extent array. */
 	int		jb_avail;	/* Available extents. */
 	int		jb_used;	/* Last used extent. */
 	int		jb_head;	/* Allocator head. */
 	int		jb_off;		/* Allocator extent offset. */
 };
 struct jextent {
 	ufs2_daddr_t	je_daddr;	/* Disk block address. */
 	int		je_blocks;	/* Disk block count. */
 };
 
 static struct jblocks *suj_jblocks;
 
 static struct jblocks *
 jblocks_create(void)
 {
 	struct jblocks *jblocks;
 	int size;
 
 	jblocks = errmalloc(sizeof(*jblocks));
 	jblocks->jb_avail = 10;
 	jblocks->jb_used = 0;
 	jblocks->jb_head = 0;
 	jblocks->jb_off = 0;
 	size = sizeof(struct jextent) * jblocks->jb_avail;
 	jblocks->jb_extent = errmalloc(size);
 	bzero(jblocks->jb_extent, size);
 
 	return (jblocks);
 }
 
 /*
  * Return the next available disk block and the amount of contiguous
  * free space it contains.
  */
 static ufs2_daddr_t
 jblocks_next(struct jblocks *jblocks, int bytes, int *actual)
 {
 	struct jextent *jext;
 	ufs2_daddr_t daddr;
 	int freecnt;
 	int blocks;
 
 	blocks = bytes / disk->d_bsize;
 	jext = &jblocks->jb_extent[jblocks->jb_head];
 	freecnt = jext->je_blocks - jblocks->jb_off;
 	if (freecnt == 0) {
 		jblocks->jb_off = 0;
 		if (++jblocks->jb_head > jblocks->jb_used)
 			return (0);
 		jext = &jblocks->jb_extent[jblocks->jb_head];
 		freecnt = jext->je_blocks;
 	}
 	if (freecnt > blocks)
 		freecnt = blocks;
 	*actual = freecnt * disk->d_bsize;
 	daddr = jext->je_daddr + jblocks->jb_off;
 
 	return (daddr);
 }
 
 /*
  * Advance the allocation head by a specified number of bytes, consuming
  * one journal segment.
  */
 static void
 jblocks_advance(struct jblocks *jblocks, int bytes)
 {
 
 	jblocks->jb_off += bytes / disk->d_bsize;
 }
 
 static void
 jblocks_destroy(struct jblocks *jblocks)
 {
 
 	free(jblocks->jb_extent);
 	free(jblocks);
 }
 
 static void
 jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks)
 {
 	struct jextent *jext;
 	int size;
 
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	/* Adding the first block. */
 	if (jext->je_daddr == 0) {
 		jext->je_daddr = daddr;
 		jext->je_blocks = blocks;
 		return;
 	}
 	/* Extending the last extent. */
 	if (jext->je_daddr + jext->je_blocks == daddr) {
 		jext->je_blocks += blocks;
 		return;
 	}
 	/* Adding a new extent. */
 	if (++jblocks->jb_used == jblocks->jb_avail) {
 		jblocks->jb_avail *= 2;
 		size = sizeof(struct jextent) * jblocks->jb_avail;
 		jext = errmalloc(size);
 		bzero(jext, size);
 		bcopy(jblocks->jb_extent, jext,
 		    sizeof(struct jextent) * jblocks->jb_used);
 		free(jblocks->jb_extent);
 		jblocks->jb_extent = jext;
 	}
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	jext->je_daddr = daddr;
 	jext->je_blocks = blocks;
 
 	return;
 }
 
 /*
  * Add a file block from the journal to the extent map.  We can't read
  * each file block individually because the kernel treats it as a circular
  * buffer and segments may span mutliple contiguous blocks.
  */
 static void
 suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 
 	jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags));
 }
 
 static void
 suj_read(void)
 {
 	uint8_t block[1 * 1024 * 1024];
 	struct suj_seg *seg;
 	struct jsegrec *recn;
 	struct jsegrec *rec;
 	ufs2_daddr_t blk;
 	int readsize;
 	int blocks;
 	int recsize;
 	int size;
 	int i;
 
 	/*
 	 * Read records until we exhaust the journal space.  If we find
 	 * an invalid record we start searching for a valid segment header
 	 * at the next block.  This is because we don't have a head/tail
 	 * pointer and must recover the information indirectly.  At the gap
 	 * between the head and tail we won't necessarily have a valid
 	 * segment.
 	 */
 restart:
 	for (;;) {
 		size = sizeof(block);
 		blk = jblocks_next(suj_jblocks, size, &readsize);
 		if (blk == 0)
 			return;
 		size = readsize;
 		/*
 		 * Read 1MB at a time and scan for records within this block.
 		 */
 		if (bread(disk, blk, &block, size) == -1) {
 			err_suj("Error reading journal block %jd\n",
 			    (intmax_t)blk);
 		}
 		for (rec = (void *)block; size; size -= recsize,
 		    rec = (struct jsegrec *)((uintptr_t)rec + recsize)) {
 			recsize = real_dev_bsize;
 			if (rec->jsr_time != fs->fs_mtime) {
 				if (debug)
 					printf("Rec time %jd != fs mtime %jd\n",
 					    rec->jsr_time, fs->fs_mtime);
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			if (rec->jsr_cnt == 0) {
 				if (debug)
 					printf("Found illegal count %d\n",
 					    rec->jsr_cnt);
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			blocks = rec->jsr_blocks;
 			recsize = blocks * real_dev_bsize;
 			if (recsize > size) {
 				/*
 				 * We may just have run out of buffer, restart
 				 * the loop to re-read from this spot.
 				 */
 				if (size < fs->fs_bsize &&
 				    size != readsize &&
 				    recsize <= fs->fs_bsize)
 					goto restart;
 				if (debug)
 					printf("Found invalid segsize %d > %d\n",
 					    recsize, size);
 				recsize = real_dev_bsize;
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			/*
 			 * Verify that all blocks in the segment are present.
 			 */
 			for (i = 1; i < blocks; i++) {
 				recn = (void *)((uintptr_t)rec) + i *
 				    real_dev_bsize;
 				if (recn->jsr_seq == rec->jsr_seq &&
 				    recn->jsr_time == rec->jsr_time)
 					continue;
 				if (debug)
 					printf("Incomplete record %jd (%d)\n",
 					    rec->jsr_seq, i);
 				recsize = i * real_dev_bsize;
 				jblocks_advance(suj_jblocks, recsize);
 				goto restart;
 			}
 			seg = errmalloc(sizeof(*seg));
 			seg->ss_blk = errmalloc(recsize);
 			seg->ss_rec = *rec;
 			bcopy((void *)rec, seg->ss_blk, recsize);
 			if (rec->jsr_oldest > oldseq)
 				oldseq = rec->jsr_oldest;
 			TAILQ_INSERT_TAIL(&allsegs, seg, ss_next);
 			jblocks_advance(suj_jblocks, recsize);
 		}
 	}
 }
 
 /*
  * Search a directory block for the SUJ_FILE.
  */
 static void
 suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	char block[MAXBSIZE];
 	struct direct *dp;
 	int bytes;
 	int off;
 
 	if (sujino)
 		return;
 	bytes = lfragtosize(fs, frags);
 	if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0)
 		err_suj("Failed to read UFS_ROOTINO directory block %jd\n",
 		    blk);
 	for (off = 0; off < bytes; off += dp->d_reclen) {
 		dp = (struct direct *)&block[off];
 		if (dp->d_reclen == 0)
 			break;
 		if (dp->d_ino == 0)
 			continue;
 		if (dp->d_namlen != strlen(SUJ_FILE))
 			continue;
 		if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
 			continue;
 		sujino = dp->d_ino;
 		return;
 	}
 }
 
 /*
  * Orchestrate the verification of a filesystem via the softupdates journal.
  */
 int
 suj_check(const char *filesys)
 {
 	union dinode *jip;
 	union dinode *ip;
 	uint64_t blocks;
 	int retval;
 	struct suj_seg *seg;
 	struct suj_seg *segn;
 
 	initsuj();
 	opendisk(filesys);
 
 	/*
 	 * Set an exit point when SUJ check failed
 	 */
 	retval = setjmp(jmpbuf);
 	if (retval != 0) {
 		pwarn("UNEXPECTED SU+J INCONSISTENCY\n");
 		TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 			TAILQ_REMOVE(&allsegs, seg, ss_next);
 				free(seg->ss_blk);
 				free(seg);
 		}
 		if (reply("FALLBACK TO FULL FSCK") == 0) {
 			ckfini(0);
 			exit(EEXIT);
 		} else
 			return (-1);
 	}
 
 	/*
 	 * Find the journal inode.
 	 */
 	ip = ino_read(UFS_ROOTINO);
 	sujino = 0;
 	ino_visit(ip, UFS_ROOTINO, suj_find, 0);
 	if (sujino == 0) {
 		printf("Journal inode removed.  Use tunefs to re-create.\n");
 		sblock.fs_flags &= ~FS_SUJ;
 		sblock.fs_sujfree = 0;
 		return (-1);
 	}
 	/*
 	 * Fetch the journal inode and verify it.
 	 */
 	jip = ino_read(sujino);
 	printf("** SU+J Recovering %s\n", filesys);
 	if (suj_verifyino(jip) != 0)
 		return (-1);
 	/*
 	 * Build a list of journal blocks in jblocks before parsing the
 	 * available journal blocks in with suj_read().
 	 */
 	printf("** Reading %jd byte journal from inode %ju.\n",
 	    DIP(jip, di_size), (uintmax_t)sujino);
 	suj_jblocks = jblocks_create();
 	blocks = ino_visit(jip, sujino, suj_add_block, 0);
 	if (blocks != numfrags(fs, DIP(jip, di_size))) {
 		printf("Sparse journal inode %ju.\n", (uintmax_t)sujino);
 		return (-1);
 	}
 	suj_read();
 	jblocks_destroy(suj_jblocks);
 	suj_jblocks = NULL;
 	if (preen || reply("RECOVER")) {
 		printf("** Building recovery table.\n");
 		suj_prune();
 		suj_build();
 		cg_apply(cg_build);
 		printf("** Resolving unreferenced inode list.\n");
 		ino_unlinked();
 		printf("** Processing journal entries.\n");
 		cg_apply(cg_trunc);
 		cg_apply(cg_check_blk);
 		cg_apply(cg_adj_blk);
 		cg_apply(cg_check_ino);
 	}
 	if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0)
 		return (0);
 	/*
 	 * To remain idempotent with partial truncations the free bitmaps
 	 * must be written followed by indirect blocks and lastly inode
 	 * blocks.  This preserves access to the modified pointers until
 	 * they are freed.
 	 */
 	cg_apply(cg_write);
 	dblk_write();
 	cg_apply(cg_write_inos);
 	/* Write back superblock. */
 	closedisk(filesys);
 	if (jrecs > 0 || jbytes > 0) {
 		printf("** %jd journal records in %jd bytes for %.2f%% utilization\n",
 		    jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100);
 		printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n",
 		    freeinos, freedir, freeblocks, freefrags);
 	}
 
 	return (0);
 }
 
 static void
 initsuj(void)
 {
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_INIT(&cghash[i]);
 		LIST_INIT(&dbhash[i]);
 	}
 	lastcg = NULL;
 	lastblk = NULL;
 	TAILQ_INIT(&allsegs);
 	oldseq = 0;
 	disk = NULL;
 	fs = NULL;
 	sujino = 0;
 	freefrags = 0;
 	freeblocks = 0;
 	freeinos = 0;
 	freedir = 0;
 	jbytes = 0;
 	jrecs = 0;
 	suj_jblocks = NULL;
 }
Index: head/share/man/man5/acct.5
===================================================================
--- head/share/man/man5/acct.5	(revision 318735)
+++ head/share/man/man5/acct.5	(revision 318736)
@@ -1,127 +1,127 @@
 .\" Copyright (c) 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)acct.5	8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd May 15, 2007
+.Dd February 13, 2017
 .Dt ACCT 5
 .Os
 .Sh NAME
 .Nm acct
 .Nd execution accounting file
 .Sh SYNOPSIS
 .In sys/types.h
 .In sys/acct.h
 .Sh DESCRIPTION
 The kernel maintains the following
 .Fa acct
 information structure for all
 processes.
 If a process terminates, and accounting is enabled,
 the kernel calls the
 .Xr acct 2
 function call to prepare and append the record
 to the accounting file.
 .Bd -literal
 #define AC_COMM_LEN 16
 
 /*
- * Accounting structure version 2 (current).
+ * Accounting structure version 3 (current).
  * The first byte is always zero.
  * Time units are microseconds.
  */
 
-struct acctv2 {
+struct acctv3 {
 	uint8_t  ac_zero;		/* zero identifies new version */
 	uint8_t  ac_version;		/* record version number */
 	uint16_t ac_len;		/* record length */
 
 	char	  ac_comm[AC_COMM_LEN];	/* command name */
 	float	  ac_utime;		/* user time */
 	float	  ac_stime;		/* system time */
 	float	  ac_etime;		/* elapsed time */
 	time_t	  ac_btime;		/* starting time */
 	uid_t	  ac_uid;		/* user id */
 	gid_t	  ac_gid;		/* group id */
 	float	  ac_mem;		/* average memory usage */
 	float	  ac_io;		/* count of IO blocks */
 	__dev_t   ac_tty;		/* controlling tty */
 
 	uint16_t ac_len2;		/* record length */
 	union {
-		__dev_t	  ac_align;	/* force v1 compatible alignment */
+		uint32_t  ac_align;	/* force v1 compatible alignment */
 
 #define	AFORK	0x01			/* forked but not exec'ed */
 /* ASU is no longer supported */
 #define	ASU	0x02			/* used super-user permissions */
 #define	ACOMPAT	0x04			/* used compatibility mode */
 #define	ACORE	0x08			/* dumped core */
 #define	AXSIG	0x10			/* killed by a signal */
 #define ANVER	0x20			/* new record version */
 
 		uint8_t  ac_flag;	/* accounting flags */
 	} ac_trailer;
 
 #define ac_flagx ac_trailer.ac_flag
 };
 .Ed
 .Pp
 If a terminated process was created by an
 .Xr execve 2 ,
 the name of the executed file (at most ten characters of it)
 is saved in the field
 .Fa ac_comm
 and its status is saved by setting one of more of the following flags in
 .Fa ac_flag :
 .Dv AFORK ,
 .Dv ACOMPAT ,
 .Dv ACORE
 and
 .Dv ASIG .
 .Dv ASU
 is no longer supported.
 .Dv ANVER
 is always set in the above structure.
 .Sh SEE ALSO
 .Xr lastcomm 1 ,
 .Xr acct 2 ,
 .Xr execve 2 ,
 .Xr sa 8
 .Sh HISTORY
 A
 .Nm
 file format appeared in
 .At v7 .
 The current record format was introduced on May 2007.
 It is backwards compatible with the previous format,
 which is still documented in
 .In sys/acct.h
 and supported by
 .Xr lastcomm 1
 and
 .Xr sa 8 .
Index: head/share/man/man5/dir.5
===================================================================
--- head/share/man/man5/dir.5	(revision 318735)
+++ head/share/man/man5/dir.5	(revision 318736)
@@ -1,156 +1,167 @@
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)dir.5	8.3 (Berkeley) 4/19/94
 .\" $FreeBSD$
 .\"
-.Dd April 19, 1994
+.Dd February 13, 2017
 .Dt DIR 5
 .Os
 .Sh NAME
 .Nm dir ,
 .Nm dirent
 .Nd directory file format
 .Sh SYNOPSIS
 .In dirent.h
 .Sh DESCRIPTION
 Directories provide a convenient hierarchical method of grouping
 files while obscuring the underlying details of the storage medium.
 A directory file is differentiated from a plain file
 by a flag in its
 .Xr inode 5
 entry.
 It consists of records (directory entries) each of which contains
 information about a file and a pointer to the file itself.
 Directory entries may contain other directories
 as well as plain files; such nested directories are referred to as
 subdirectories.
 A hierarchy of directories and files is formed in this manner
 and is called a file system (or referred to as a file system tree).
 .\" An entry in this tree,
 .\" nested or not nested,
 .\" is a pathname.
 .Pp
 Each directory file contains two special directory entries; one is a pointer
 to the directory itself
 called dot
 .Ql .\&
 and the other a pointer to its parent directory called dot-dot
 .Ql \&.. .
 Dot and dot-dot
 are valid pathnames, however,
 the system root directory
 .Ql / ,
 has no parent and dot-dot points to itself like dot.
 .Pp
 File system nodes are ordinary directory files on which has
 been grafted a file system object, such as a physical disk or a
 partitioned area of such a disk.
 (See
 .Xr mount 2
 and
 .Xr mount 8 . )
 .Pp
 The directory entry format is defined in the file
 .In sys/dirent.h
 (which should not be included directly by applications):
 .Bd -literal
 #ifndef	_SYS_DIRENT_H_
 #define	_SYS_DIRENT_H_
 
 #include <machine/ansi.h>
 
 /*
  * The dirent structure defines the format of directory entries returned by
  * the getdirentries(2) system call.
  *
  * A directory entry has a struct dirent at the front of it, containing its
  * inode number, the length of the entry, and the length of the name
  * contained in the entry.  These are followed by the name padded to a 4
  * byte boundary with null bytes.  All names are guaranteed null terminated.
  * The maximum length of a name in a directory is MAXNAMLEN.
+ * Explicit pad is added between the last member of the header and
+ * d_name, to avoid having the ABI padding in the end of dirent on
+ * LP64 arches.  There is code depending on d_name being last.  Also,
+ * keeping this pad for ILP32 architectures simplifies compat32 layer.
  */
 
 struct dirent {
-	__uint32_t d_fileno;		/* file number of entry */
+	ino_t      d_fileno;		/* file number of entry */
+	off_t      d_off;		/* directory offset of entry */
 	__uint16_t d_reclen;		/* length of this record */
-	__uint8_t  d_type; 		/* file type, see below */
+	__uint8_t  d_type;		/* file type, see below */
 	__uint8_t  d_namlen;		/* length of string in d_name */
-#ifdef _POSIX_SOURCE
-	char	d_name[255 + 1];	/* name must be no longer than this */
-#else
+	__uint32_t d_pad0;
+#if __BSD_VISIBLE
 #define	MAXNAMLEN	255
 	char	d_name[MAXNAMLEN + 1];	/* name must be no longer than this */
+#else
+	char	d_name[255 + 1];	/* name must be no longer than this */
 #endif
 };
 
 /*
  * File types
  */
 #define	DT_UNKNOWN	 0
 #define	DT_FIFO		 1
 #define	DT_CHR		 2
 #define	DT_DIR		 4
 #define	DT_BLK		 6
 #define	DT_REG		 8
 #define	DT_LNK		10
 #define	DT_SOCK		12
 #define	DT_WHT		14
 
 /*
  * Convert between stat structure types and directory types.
  */
 #define	IFTODT(mode)	(((mode) & 0170000) >> 12)
 #define	DTTOIF(dirtype)	((dirtype) << 12)
 
 /*
  * The _GENERIC_DIRSIZ macro gives the minimum record length which will hold
- * the directory entry.  This requires the amount of space in struct direct
+ * the directory entry.  This returns the amount of space in struct direct
  * without the d_name field, plus enough space for the name with a terminating
- * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ * null byte (dp->d_namlen+1), rounded up to a 8 byte boundary.
+ *
+ * XXX although this macro is in the implementation namespace, it requires
+ * a manifest constant that is not.
  */
-#define	_GENERIC_DIRSIZ(dp) \
-    ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#define	_GENERIC_DIRLEN(namlen)					\
+	((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7)
+#define	_GENERIC_DIRSIZ(dp)	_GENERIC_DIRLEN((dp)->d_namlen)
+#endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 #define	GENERIC_DIRSIZ(dp)	_GENERIC_DIRSIZ(dp)
 #endif
 
 #endif /* !_SYS_DIRENT_H_ */
 .Ed
 .Sh SEE ALSO
 .Xr fs 5 ,
 .Xr inode 5
 .Sh HISTORY
 A
 .Nm
 file format appeared in
 .At v7 .
 .Sh BUGS
 The usage of the member d_type of struct dirent is unportable as it is
 .Fx Ns -specific .
 It also may fail on certain file systems, for example the cd9660 file system.
Index: head/sys/bsm/audit.h
===================================================================
--- head/sys/bsm/audit.h	(revision 318735)
+++ head/sys/bsm/audit.h	(revision 318736)
@@ -1,345 +1,345 @@
 /*-
  * Copyright (c) 2005-2009 Apple Inc.
  * Copyright (c) 2016 Robert N. M. Watson
  * All rights reserved.
  *
  * Portions of this software were developed by BAE Systems, the University of
  * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
  * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
  * Computing (TC) research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1.  Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
  *     its contributors may be used to endorse or promote products derived
  *     from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_BSM_AUDIT_H
 #define	_BSM_AUDIT_H
 
 #include <sys/param.h>
 #include <sys/types.h>
 
 #define	AUDIT_RECORD_MAGIC	0x828a0f1b
 #define	MAX_AUDIT_RECORDS	20
 #define	MAXAUDITDATA		(0x8000 - 1)
 #define	MAX_AUDIT_RECORD_SIZE	MAXAUDITDATA
 #define	MIN_AUDIT_FILE_SIZE	(512 * 1024)
 
 /*
  * Minimum noumber of free blocks on the filesystem containing the audit
  * log necessary to avoid a hard log rotation. DO NOT SET THIS VALUE TO 0
  * as the kernel does an unsigned compare, plus we want to leave a few blocks
  * free so userspace can terminate the log, etc.
  */
 #define	AUDIT_HARD_LIMIT_FREE_BLOCKS	4
 
 /*
  * Triggers for the audit daemon.
  */
 #define	AUDIT_TRIGGER_MIN		1
 #define	AUDIT_TRIGGER_LOW_SPACE		1	/* Below low watermark. */
 #define	AUDIT_TRIGGER_ROTATE_KERNEL	2	/* Kernel requests rotate. */
 #define	AUDIT_TRIGGER_READ_FILE		3	/* Re-read config file. */
 #define	AUDIT_TRIGGER_CLOSE_AND_DIE	4	/* Terminate audit. */
 #define	AUDIT_TRIGGER_NO_SPACE		5	/* Below min free space. */
 #define	AUDIT_TRIGGER_ROTATE_USER	6	/* User requests rotate. */
 #define	AUDIT_TRIGGER_INITIALIZE	7	/* User initialize of auditd. */
 #define	AUDIT_TRIGGER_EXPIRE_TRAILS	8	/* User expiration of trails. */
 #define	AUDIT_TRIGGER_MAX		8
 
 /*
  * The special device filename (FreeBSD).
  */
 #define	AUDITDEV_FILENAME	"audit"
 #define	AUDIT_TRIGGER_FILE	("/dev/" AUDITDEV_FILENAME)
 
 /*
  * Pre-defined audit IDs
  */
 #define	AU_DEFAUDITID	(uid_t)(-1)
 #define	AU_DEFAUDITSID	 0
 #define	AU_ASSIGN_ASID	-1
 
 /*
  * IPC types.
  */
 #define	AT_IPC_MSG	((u_char)1)	/* Message IPC id. */
 #define	AT_IPC_SEM	((u_char)2)	/* Semaphore IPC id. */
 #define	AT_IPC_SHM	((u_char)3)	/* Shared mem IPC id. */
 
 /*
  * Audit conditions.
  */
 #define	AUC_UNSET		0
 #define	AUC_AUDITING		1
 #define	AUC_NOAUDIT		2
 #define	AUC_DISABLED		-1
 
 /*
  * auditon(2) commands.
  */
 #define	A_OLDGETPOLICY	2
 #define	A_OLDSETPOLICY	3
 #define	A_GETKMASK	4
 #define	A_SETKMASK	5
 #define	A_OLDGETQCTRL	6
 #define	A_OLDSETQCTRL	7
 #define	A_GETCWD	8
 #define	A_GETCAR	9
 #define	A_GETSTAT	12
 #define	A_SETSTAT	13
 #define	A_SETUMASK	14
 #define	A_SETSMASK	15
 #define	A_OLDGETCOND	20
 #define	A_OLDSETCOND	21
 #define	A_GETCLASS	22
 #define	A_SETCLASS	23
 #define	A_GETPINFO	24
 #define	A_SETPMASK	25
 #define	A_SETFSIZE	26
 #define	A_GETFSIZE	27
 #define	A_GETPINFO_ADDR	28
 #define	A_GETKAUDIT	29
 #define	A_SETKAUDIT	30
 #define	A_SENDTRIGGER	31
 #define	A_GETSINFO_ADDR	32
 #define	A_GETPOLICY	33
 #define	A_SETPOLICY	34
 #define	A_GETQCTRL	35
 #define	A_SETQCTRL	36
 #define	A_GETCOND	37
 #define	A_SETCOND	38
 #define	A_GETEVENT	39	/* Get audit event-to-name mapping. */
 #define	A_SETEVENT	40	/* Set audit event-to-name mapping. */
 
 /*
  * Audit policy controls.
  */
 #define	AUDIT_CNT	0x0001
 #define	AUDIT_AHLT	0x0002
 #define	AUDIT_ARGV	0x0004
 #define	AUDIT_ARGE	0x0008
 #define	AUDIT_SEQ	0x0010
 #define	AUDIT_WINDATA	0x0020
 #define	AUDIT_USER	0x0040
 #define	AUDIT_GROUP	0x0080
 #define	AUDIT_TRAIL	0x0100
 #define	AUDIT_PATH	0x0200
 #define	AUDIT_SCNT	0x0400
 #define	AUDIT_PUBLIC	0x0800
 #define	AUDIT_ZONENAME	0x1000
 #define	AUDIT_PERZONE	0x2000
 
 /*
  * Default audit queue control parameters.
  */
 #define	AQ_HIWATER	100
 #define	AQ_MAXHIGH	10000
 #define	AQ_LOWATER	10
 #define	AQ_BUFSZ	MAXAUDITDATA
 #define	AQ_MAXBUFSZ	1048576
 
 /*
  * Default minimum percentage free space on file system.
  */
 #define	AU_FS_MINFREE	20
 
 /*
  * Type definitions used indicating the length of variable length addresses
  * in tokens containing addresses, such as header fields.
  */
 #define	AU_IPv4		4
 #define	AU_IPv6		16
 
 __BEGIN_DECLS
 
 typedef	uid_t		au_id_t;
 typedef	pid_t		au_asid_t;
 typedef	u_int16_t	au_event_t;
 typedef	u_int16_t	au_emod_t;
 typedef	u_int32_t	au_class_t;
 typedef	u_int64_t	au_asflgs_t __attribute__ ((aligned (8)));
 
 struct au_tid {
-	dev_t		port;
+	u_int32_t	port;		/* XXX dev_t compatibility */
 	u_int32_t	machine;
 };
 typedef	struct au_tid	au_tid_t;
 
 struct au_tid_addr {
-	dev_t		at_port;
+	u_int32_t	at_port;	/* XXX dev_t compatibility */
 	u_int32_t	at_type;
 	u_int32_t	at_addr[4];
 };
 typedef	struct au_tid_addr	au_tid_addr_t;
 
 struct au_mask {
 	unsigned int    am_success;     /* Success bits. */
 	unsigned int    am_failure;     /* Failure bits. */
 };
 typedef	struct au_mask	au_mask_t;
 
 struct auditinfo {
 	au_id_t		ai_auid;	/* Audit user ID. */
 	au_mask_t	ai_mask;	/* Audit masks. */
 	au_tid_t	ai_termid;	/* Terminal ID. */
 	au_asid_t	ai_asid;	/* Audit session ID. */
 };
 typedef	struct auditinfo	auditinfo_t;
 
 struct auditinfo_addr {
 	au_id_t		ai_auid;	/* Audit user ID. */
 	au_mask_t	ai_mask;	/* Audit masks. */
 	au_tid_addr_t	ai_termid;	/* Terminal ID. */
 	au_asid_t	ai_asid;	/* Audit session ID. */
 	au_asflgs_t	ai_flags;	/* Audit session flags. */
 };
 typedef	struct auditinfo_addr	auditinfo_addr_t;
 
 struct auditpinfo {
 	pid_t		ap_pid;		/* ID of target process. */
 	au_id_t		ap_auid;	/* Audit user ID. */
 	au_mask_t	ap_mask;	/* Audit masks. */
 	au_tid_t	ap_termid;	/* Terminal ID. */
 	au_asid_t	ap_asid;	/* Audit session ID. */
 };
 typedef	struct auditpinfo	auditpinfo_t;
 
 struct auditpinfo_addr {
 	pid_t		ap_pid;		/* ID of target process. */
 	au_id_t		ap_auid;	/* Audit user ID. */
 	au_mask_t	ap_mask;	/* Audit masks. */
 	au_tid_addr_t	ap_termid;	/* Terminal ID. */
 	au_asid_t	ap_asid;	/* Audit session ID. */
 	au_asflgs_t	ap_flags;	/* Audit session flags. */
 };
 typedef	struct auditpinfo_addr	auditpinfo_addr_t;
 
 struct au_session {
 	auditinfo_addr_t	*as_aia_p;	/* Ptr to full audit info. */
 	au_mask_t		 as_mask;	/* Process Audit Masks. */
 };
 typedef struct au_session       au_session_t;
 
 /*
  * Contents of token_t are opaque outside of libbsm.
  */
 typedef	struct au_token	token_t;
 
 /*
  * Kernel audit queue control parameters:
  * 			Default:		Maximum:
  * 	aq_hiwater:	AQ_HIWATER (100)	AQ_MAXHIGH (10000) 
  * 	aq_lowater:	AQ_LOWATER (10)		<aq_hiwater
  * 	aq_bufsz:	AQ_BUFSZ (32767)	AQ_MAXBUFSZ (1048576)
  * 	aq_delay:	20			20000 (not used) 
  */
 struct au_qctrl {
 	int	aq_hiwater;	/* Max # of audit recs in queue when */
 				/* threads with new ARs get blocked. */ 
 
 	int	aq_lowater;	/* # of audit recs in queue when */
 				/* blocked threads get unblocked. */
 
 	int	aq_bufsz;	/* Max size of audit record for audit(2). */
 	int	aq_delay;	/* Queue delay (not used). */
 	int	aq_minfree;	/* Minimum filesystem percent free space. */
 };
 typedef	struct au_qctrl	au_qctrl_t;
 
 /*
  * Structure for the audit statistics.
  */
 struct audit_stat {
 	unsigned int	as_version;
 	unsigned int	as_numevent;
 	int		as_generated;
 	int		as_nonattrib;
 	int		as_kernel;
 	int		as_audit;
 	int		as_auditctl;
 	int		as_enqueue;
 	int		as_written;
 	int		as_wblocked;
 	int		as_rblocked;
 	int		as_dropped;
 	int		as_totalsize;
 	unsigned int	as_memused;
 };
 typedef	struct audit_stat	au_stat_t;
 
 /*
  * Structure for the audit file statistics.
  */
 struct audit_fstat {
 	u_int64_t	af_filesz;
 	u_int64_t	af_currsz;
 };
 typedef	struct audit_fstat	au_fstat_t;
 
 /*
  * Audit to event class mapping.
  */
 struct au_evclass_map {
 	au_event_t	ec_number;
 	au_class_t	ec_class;
 };
 typedef	struct au_evclass_map	au_evclass_map_t;
 
 /*
  * Event-to-name mapping.
  */
 #define	EVNAMEMAP_NAME_SIZE	64
 struct au_evname_map {
 	au_event_t	en_number;
 	char		en_name[EVNAMEMAP_NAME_SIZE];
 };
 typedef struct au_evname_map	au_evname_map_t;
 
 /*
  * Audit system calls.
  */
 #if !defined(_KERNEL) && !defined(KERNEL)
 int	audit(const void *, int);
 int	auditon(int, void *, int);
 int	auditctl(const char *);
 int	getauid(au_id_t *);
 int	setauid(const au_id_t *);
 int	getaudit(struct auditinfo *);
 int	setaudit(const struct auditinfo *);
 int	getaudit_addr(struct auditinfo_addr *, int);
 int	setaudit_addr(const struct auditinfo_addr *, int);
 
 #ifdef __APPLE_API_PRIVATE
 #include <mach/port.h>
 mach_port_name_t audit_session_self(void);
 au_asid_t	 audit_session_join(mach_port_name_t port);
 #endif /* __APPLE_API_PRIVATE */
 
 #endif /* defined(_KERNEL) || defined(KERNEL) */
 
 __END_DECLS
 
 #endif /* !_BSM_AUDIT_H */
Index: head/sys/cddl/compat/opensolaris/sys/dirent.h
===================================================================
--- head/sys/cddl/compat/opensolaris/sys/dirent.h	(revision 318735)
+++ head/sys/cddl/compat/opensolaris/sys/dirent.h	(revision 318736)
@@ -1,47 +1,45 @@
 /*-
  * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _OPENSOLARIS_SYS_DIRENT_H_
 #define	_OPENSOLARIS_SYS_DIRENT_H_
 
 #include <sys/types.h>
 
 #include_next <sys/dirent.h>
 
 typedef	struct dirent	dirent64_t;
 typedef ino_t		ino64_t;
 
 #define	dirent64	dirent
 
 #define	d_ino	d_fileno
 
-#define	DIRENT64_RECLEN(len)	((sizeof(struct dirent) -		\
-				 sizeof(((struct dirent *)NULL)->d_name) + \
-				 (len) + 1 + 3) & ~3)
+#define	DIRENT64_RECLEN(len)	_GENERIC_DIRLEN(len)
 
 #endif	/* !_OPENSOLARIS_SYS_DIRENT_H_ */
Index: head/sys/compat/freebsd32/capabilities.conf
===================================================================
--- head/sys/compat/freebsd32/capabilities.conf	(revision 318735)
+++ head/sys/compat/freebsd32/capabilities.conf	(revision 318736)
@@ -1,283 +1,288 @@
 ##
 ## Copyright (c) 2008-2010 Robert N. M. Watson
 ## Copyright (c) 2016 The FreeBSD Foundation
 ## All rights reserved.
 ##
 ## This software was developed at the University of Cambridge Computer
 ## Laboratory with support from a grant from Google, Inc.
 ##
 ## Portions of this software were developed by Konstantin Belousov
 ## under sponsorship from the FreeBSD Foundation.
 ##
 ## Redistribution and use in source and binary forms, with or without
 ## modification, are permitted provided that the following conditions
 ## are met:
 ## 1. Redistributions of source code must retain the above copyright
 ##    notice, this list of conditions and the following disclaimer.
 ## 2. Redistributions in binary form must reproduce the above copyright
 ##    notice, this list of conditions and the following disclaimer in the
 ##    documentation and/or other materials provided with the distribution.
 ##
 ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 ## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ## ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 ## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 ## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 ## SUCH DAMAGE.
 ##
 ## List of system calls enabled in freebsd32 capability mode, one name
 ## per line.  See the original list in the sys/kern/capabilities.conf.
 ## Position of the compat syscall in this file must be identical to
 ## the master, to facilitate comparision and diagnostic.
 ##
 ## $FreeBSD$
 ##
 
 __acl_aclcheck_fd
 __acl_delete_fd
 __acl_get_fd
 __acl_set_fd
 __mac_get_fd
 #__mac_get_pid
 __mac_get_proc
 __mac_set_fd
 __mac_set_proc
 freebsd32_sysctl
 freebsd32_umtx_op
 abort2
 accept
 accept4
 aio_cancel
 freebsd32_aio_error
 aio_fsync
 freebsd32_aio_read
 freebsd32_aio_return
 freebsd32_aio_suspend
 freebsd32_aio_waitcomplete
 freebsd32_aio_write
 #audit
 bindat
 cap_enter
 cap_fcntls_get
 cap_fcntls_limit
 cap_getmode
 freebsd32_cap_ioctls_get
 freebsd32_cap_ioctls_limit
 __cap_rights_get
 cap_rights_limit
 freebsd32_clock_getres
 freebsd32_clock_gettime
 close
 closefrom
 connectat
 #cpuset
 #freebsd32_cpuset_getaffinity
 #freebsd32_cpuset_getid
 #freebsd32_cpuset_setaffinity
 #freebsd32_cpuset_setid
 dup
 dup2
 extattr_delete_fd
 extattr_get_fd
 extattr_list_fd
 extattr_set_fd
 fchflags
 fchmod
 fchown
 freebsd32_fcntl
 freebsd32_fexecve
 flock
 fork
 fpathconf
+freebsd11_freebsd32_fstat
+freebsd11_freebsd32_fstatat
+freebsd11_freebsd32_getdirentries
+freebsd11_freebsd32_fstatfs
+freebsd11_freebsd32_mknodat
 freebsd6_freebsd32_ftruncate
 freebsd6_freebsd32_lseek
 freebsd6_freebsd32_mmap
 freebsd6_freebsd32_pread
 freebsd6_freebsd32_pwrite
 freebsd32_fstat
 fstatfs
 fsync
 ftruncate
 freebsd32_futimens
 freebsd32_futimes
 getaudit
 getaudit_addr
 getauid
 freebsd32_getcontext
 getdents
 freebsd32_getdirentries
 getdomainname
 getdtablesize
 getegid
 geteuid
 gethostid
 gethostname
 freebsd32_getitimer
 getgid
 getgroups
 getlogin
 freebsd32_getpagesize
 getpeername
 getpgid
 getpgrp
 getpid
 getppid
 getpriority
 getresgid
 getresuid
 getrlimit
 freebsd32_getrusage
 getsid
 getsockname
 getsockopt
 freebsd32_gettimeofday
 getuid
 freebsd32_ioctl
 issetugid
 freebsd32_kevent
 kill
 freebsd32_kmq_notify
 freebsd32_kmq_setattr
 freebsd32_kmq_timedreceive
 freebsd32_kmq_timedsend
 kqueue
 freebsd32_ktimer_create
 ktimer_delete
 ktimer_getoverrun
 freebsd32_ktimer_gettime
 freebsd32_ktimer_settime
 #ktrace
 freebsd32_lio_listio
 listen
 freebsd32_lseek
 madvise
 mincore
 minherit
 mlock
 mlockall
 freebsd32_mmap
 freebsd32_mprotect
 msync
 munlock
 munlockall
 munmap
 freebsd32_nanosleep
 ntp_gettime
 freebsd6_freebsd32_aio_read
 freebsd6_freebsd32_aio_write
 obreak
 freebsd6_freebsd32_lio_listio
 chflagsat
 faccessat
 fchmodat
 fchownat
 freebsd32_fstatat
 freebsd32_futimesat
 linkat
 mkdirat
 mkfifoat
 mknodat
 openat
 readlinkat
 renameat
 symlinkat
 unlinkat
 freebsd32_utimensat
 pdfork
 pdgetpid
 pdkill
 #pdwait4	# not yet implemented
 freebsd32_pipe
 pipe2
 poll
 freebsd32_pread
 freebsd32_preadv
 profil
 #ptrace
 freebsd32_pwrite
 freebsd32_pwritev
 read
 freebsd32_readv
 freebsd6_freebsd32_recv
 freebsd32_recvfrom
 freebsd32_recvmsg
 rtprio
 rtprio_thread
 sbrk
 sched_get_priority_max
 sched_get_priority_min
 sched_getparam
 sched_getscheduler
 sched_rr_get_interval
 sched_setparam
 sched_setscheduler
 sched_yield
 sctp_generic_recvmsg
 sctp_generic_sendmsg
 sctp_generic_sendmsg_iov
 sctp_peeloff
 freebsd32_pselect
 freebsd32_select
 freebsd6_freebsd32_send
 freebsd32_sendfile
 freebsd32_sendmsg
 sendto
 setaudit
 setaudit_addr
 setauid
 freebsd32_setcontext
 setegid
 seteuid
 setgid
 freebsd32_setitimer
 setpriority
 setregid
 setresgid
 setresuid
 setreuid
 setrlimit
 setsid
 setsockopt
 setuid
 shm_open
 shutdown
 freebsd32_sigaction
 freebsd32_sigaltstack
 freebsd32_sigblock
 freebsd32_sigpending
 sigprocmask
 sigqueue
 freebsd32_sigreturn
 freebsd32_sigsetmask
 ofreebsd32_sigstack
 sigsuspend
 freebsd32_sigtimedwait
 freebsd32_sigvec
 freebsd32_sigwaitinfo
 sigwait
 socket
 socketpair
 sstk
 sync
 sys_exit
 freebsd32_sysarch
 thr_create
 thr_exit
 thr_kill
 #thr_kill2
 freebsd32_thr_new
 thr_self
 thr_set_name
 freebsd32_thr_suspend
 thr_wake
 umask
 utrace
 uuidgen
 write
 freebsd32_writev
 yield
Index: head/sys/compat/freebsd32/freebsd32.h
===================================================================
--- head/sys/compat/freebsd32/freebsd32.h	(revision 318735)
+++ head/sys/compat/freebsd32/freebsd32.h	(revision 318736)
@@ -1,374 +1,413 @@
 /*-
  * Copyright (c) 2001 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _COMPAT_FREEBSD32_FREEBSD32_H_
 #define _COMPAT_FREEBSD32_FREEBSD32_H_
 
 #include <sys/procfs.h>
 #include <sys/socket.h>
 #include <sys/user.h>
 
 #define PTRIN(v)	(void *)(uintptr_t) (v)
 #define PTROUT(v)	(u_int32_t)(uintptr_t) (v)
 
 #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0)
 #define PTRIN_CP(src,dst,fld) \
 	do { (dst).fld = PTRIN((src).fld); } while (0)
 #define PTROUT_CP(src,dst,fld) \
 	do { (dst).fld = PTROUT((src).fld); } while (0)
 
 /*
  * Being a newer port, 32-bit FreeBSD/MIPS uses 64-bit time_t.
  */
 #ifdef __mips__
 typedef	int64_t	time32_t;
 #else
 typedef	int32_t	time32_t;
 #endif
 
 struct timeval32 {
 	time32_t tv_sec;
 	int32_t tv_usec;
 };
 #define TV_CP(src,dst,fld) do {			\
 	CP((src).fld,(dst).fld,tv_sec);		\
 	CP((src).fld,(dst).fld,tv_usec);	\
 } while (0)
 
 struct timespec32 {
 	time32_t tv_sec;
 	int32_t tv_nsec;
 };
 #define TS_CP(src,dst,fld) do {			\
 	CP((src).fld,(dst).fld,tv_sec);		\
 	CP((src).fld,(dst).fld,tv_nsec);	\
 } while (0)
 
 struct itimerspec32 {
 	struct timespec32  it_interval;
 	struct timespec32  it_value;
 };
 #define ITS_CP(src, dst) do {			\
 	TS_CP((src), (dst), it_interval);	\
 	TS_CP((src), (dst), it_value);		\
 } while (0)
 
 struct rusage32 {
 	struct timeval32 ru_utime;
 	struct timeval32 ru_stime;
 	int32_t	ru_maxrss;
 	int32_t	ru_ixrss;
 	int32_t	ru_idrss;
 	int32_t	ru_isrss;
 	int32_t	ru_minflt;
 	int32_t	ru_majflt;
 	int32_t	ru_nswap;
 	int32_t	ru_inblock;
 	int32_t	ru_oublock;
 	int32_t	ru_msgsnd;
 	int32_t	ru_msgrcv;
 	int32_t	ru_nsignals;
 	int32_t	ru_nvcsw;
 	int32_t	ru_nivcsw;
 };
 
 struct wrusage32 {
 	struct rusage32	wru_self;
 	struct rusage32 wru_children;
 };
 
 struct itimerval32 {
 	struct timeval32 it_interval;
 	struct timeval32 it_value;
 };
 
-#define FREEBSD4_MNAMELEN        (88 - 2 * sizeof(int32_t)) /* size of on/from name bufs */
+#define FREEBSD4_MFSNAMELEN	16
+#define FREEBSD4_MNAMELEN	(88 - 2 * sizeof(int32_t))
 
 /* 4.x version */
 struct statfs32 {
 	int32_t	f_spare2;
 	int32_t	f_bsize;
 	int32_t	f_iosize;
 	int32_t	f_blocks;
 	int32_t	f_bfree;
 	int32_t	f_bavail;
 	int32_t	f_files;
 	int32_t	f_ffree;
 	fsid_t	f_fsid;
 	uid_t	f_owner;
 	int32_t	f_type;
 	int32_t	f_flags;
 	int32_t	f_syncwrites;
 	int32_t	f_asyncwrites;
-	char	f_fstypename[MFSNAMELEN];
+	char	f_fstypename[FREEBSD4_MFSNAMELEN];
 	char	f_mntonname[FREEBSD4_MNAMELEN];
 	int32_t	f_syncreads;
 	int32_t	f_asyncreads;
 	int16_t	f_spares1;
 	char	f_mntfromname[FREEBSD4_MNAMELEN];
 	int16_t	f_spares2 __packed;
 	int32_t f_spare[2];
 };
 
 struct kevent32 {
 	u_int32_t	ident;		/* identifier for this event */
 	short		filter;		/* filter for event */
 	u_short		flags;
 	u_int		fflags;
 	int32_t		data;
 	u_int32_t	udata;		/* opaque user data identifier */
 };
 
 struct iovec32 {
 	u_int32_t iov_base;
 	int	iov_len;
 };
 
 struct msghdr32 {
 	u_int32_t	 msg_name;
 	socklen_t	 msg_namelen;
 	u_int32_t	 msg_iov;
 	int		 msg_iovlen;
 	u_int32_t	 msg_control;
 	socklen_t	 msg_controllen;
 	int		 msg_flags;
 };
 
+#if defined(__amd64__)
+#define	__STAT32_TIME_T_EXT	1
+#endif
+
 struct stat32 {
-	dev_t	st_dev;
-	ino_t	st_ino;
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
 	mode_t	st_mode;
-	nlink_t	st_nlink;
+	u_int16_t st_padding0;
 	uid_t	st_uid;
 	gid_t	st_gid;
-	dev_t	st_rdev;
+	u_int32_t st_padding1;
+	dev_t st_rdev;
+#ifdef	__STAT32_TIME_T_EXT
+	__int32_t st_atim_ext;
+#endif
 	struct timespec32 st_atim;
+#ifdef	__STAT32_TIME_T_EXT
+	__int32_t st_mtim_ext;
+#endif
 	struct timespec32 st_mtim;
+#ifdef	__STAT32_TIME_T_EXT
+	__int32_t st_ctim_ext;
+#endif
 	struct timespec32 st_ctim;
+#ifdef	__STAT32_TIME_T_EXT
+	__int32_t st_btim_ext;
+#endif
+	struct timespec32 st_birthtim;
 	off_t	st_size;
 	int64_t	st_blocks;
 	u_int32_t st_blksize;
 	u_int32_t st_flags;
+	u_int64_t st_gen;
+	u_int64_t st_spare[10];
+};
+struct freebsd11_stat32 {
+	u_int32_t st_dev;
+	u_int32_t st_ino;
+	mode_t	st_mode;
+	u_int16_t st_nlink;
+	uid_t	st_uid;
+	gid_t	st_gid;
+	u_int32_t st_rdev;
+	struct timespec32 st_atim;
+	struct timespec32 st_mtim;
+	struct timespec32 st_ctim;
+	off_t	st_size;
+	int64_t	st_blocks;
+	u_int32_t st_blksize;
+	u_int32_t st_flags;
 	u_int32_t st_gen;
 	int32_t	st_lspare;
 	struct timespec32 st_birthtim;
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32));
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32));
 };
 
 struct ostat32 {
 	__uint16_t st_dev;
-	ino_t	st_ino;
+	__uint32_t st_ino;
 	mode_t	st_mode;
-	nlink_t	st_nlink;
+	__uint16_t st_nlink;
 	__uint16_t st_uid;
 	__uint16_t st_gid;
 	__uint16_t st_rdev;
 	__int32_t st_size;
 	struct timespec32 st_atim;
 	struct timespec32 st_mtim;
 	struct timespec32 st_ctim;
 	__int32_t st_blksize;
 	__int32_t st_blocks;
 	u_int32_t st_flags;
 	__uint32_t st_gen;
 };
 
 struct jail32_v0 {
 	u_int32_t	version;
 	uint32_t	path;
 	uint32_t	hostname;
 	u_int32_t	ip_number;
 };
 
 struct jail32 {
 	uint32_t	version;
 	uint32_t	path;
 	uint32_t	hostname;
 	uint32_t	jailname;
 	uint32_t	ip4s;
 	uint32_t	ip6s;
 	uint32_t	ip4;
 	uint32_t	ip6;
 };
 
 struct sigaction32 {
 	u_int32_t	sa_u;
 	int		sa_flags;
 	sigset_t	sa_mask;
 };
 
 struct thr_param32 {
 	uint32_t start_func;
 	uint32_t arg;
 	uint32_t stack_base;
 	uint32_t stack_size;
 	uint32_t tls_base;
 	uint32_t tls_size;
 	uint32_t child_tid;
 	uint32_t parent_tid;
 	int32_t	 flags;
 	uint32_t rtp;
 	uint32_t spare[3];
 };
 
 struct i386_ldt_args32 {
 	uint32_t start;
 	uint32_t descs;
 	uint32_t num;
 };
 
 struct mq_attr32 {
 	int	mq_flags;
 	int	mq_maxmsg;
 	int	mq_msgsize;
 	int	mq_curmsgs;
 	int	__reserved[4];
 };
 
 struct kinfo_proc32 {
 	int	ki_structsize;
 	int	ki_layout;
 	uint32_t ki_args;
 	uint32_t ki_paddr;
 	uint32_t ki_addr;
 	uint32_t ki_tracep;
 	uint32_t ki_textvp;
 	uint32_t ki_fd;
 	uint32_t ki_vmspace;
 	uint32_t ki_wchan;
 	pid_t	ki_pid;
 	pid_t	ki_ppid;
 	pid_t	ki_pgid;
 	pid_t	ki_tpgid;
 	pid_t	ki_sid;
 	pid_t	ki_tsid;
 	short	ki_jobc;
 	short	ki_spare_short1;
-	dev_t	ki_tdev;
+	uint32_t ki_tdev_freebsd11;
 	sigset_t ki_siglist;
 	sigset_t ki_sigmask;
 	sigset_t ki_sigignore;
 	sigset_t ki_sigcatch;
 	uid_t	ki_uid;
 	uid_t	ki_ruid;
 	uid_t	ki_svuid;
 	gid_t	ki_rgid;
 	gid_t	ki_svgid;
 	short	ki_ngroups;
 	short	ki_spare_short2;
 	gid_t 	ki_groups[KI_NGROUPS];
 	uint32_t ki_size;
 	int32_t ki_rssize;
 	int32_t ki_swrss;
 	int32_t ki_tsize;
 	int32_t ki_dsize;
 	int32_t ki_ssize;
 	u_short	ki_xstat;
 	u_short	ki_acflag;
 	fixpt_t	ki_pctcpu;
 	u_int	ki_estcpu;
 	u_int	ki_slptime;
 	u_int	ki_swtime;
 	u_int	ki_cow;
 	u_int64_t ki_runtime;
 	struct	timeval32 ki_start;
 	struct	timeval32 ki_childtime;
 	int	ki_flag;
 	int	ki_kiflag;
 	int	ki_traceflag;
 	char	ki_stat;
 	signed char ki_nice;
 	char	ki_lock;
 	char	ki_rqindex;
 	u_char	ki_oncpu_old;
 	u_char	ki_lastcpu_old;
 	char	ki_tdname[TDNAMLEN+1];
 	char	ki_wmesg[WMESGLEN+1];
 	char	ki_login[LOGNAMELEN+1];
 	char	ki_lockname[LOCKNAMELEN+1];
 	char	ki_comm[COMMLEN+1];
 	char	ki_emul[KI_EMULNAMELEN+1];
 	char	ki_loginclass[LOGINCLASSLEN+1];
 	char	ki_moretdname[MAXCOMLEN-TDNAMLEN+1];
 	char	ki_sparestrings[46];
 	int	ki_spareints[KI_NSPARE_INT];
+	uint64_t ki_tdev;
 	int	ki_oncpu;
 	int	ki_lastcpu;
 	int	ki_tracer;
 	int	ki_flag2;
 	int	ki_fibnum;
 	u_int	ki_cr_flags;
 	int	ki_jid;
 	int	ki_numthreads;
 	lwpid_t	ki_tid;
 	struct	priority ki_pri;
 	struct	rusage32 ki_rusage;
 	struct	rusage32 ki_rusage_ch;
 	uint32_t ki_pcb;
 	uint32_t ki_kstack;
 	uint32_t ki_udata;
 	uint32_t ki_tdaddr;
 	uint32_t ki_spareptrs[KI_NSPARE_PTR];	/* spare room for growth */
 	int	ki_sparelongs[KI_NSPARE_LONG];
 	int	ki_sflag;
 	int	ki_tdflags;
 };
 
 struct kinfo_sigtramp32 {
 	uint32_t ksigtramp_start;
 	uint32_t ksigtramp_end;
 	uint32_t ksigtramp_spare[4];
 };
 
 struct kld32_file_stat_1 {
 	int	version;	/* set to sizeof(struct kld_file_stat_1) */
 	char	name[MAXPATHLEN];
 	int	refs;
 	int	id;
 	uint32_t address;	/* load address */
 	uint32_t size;		/* size in bytes */
 };
 
 struct kld32_file_stat {
 	int	version;	/* set to sizeof(struct kld_file_stat) */
 	char	name[MAXPATHLEN];
 	int	refs;
 	int	id;
 	uint32_t address;	/* load address */
 	uint32_t size;		/* size in bytes */
 	char	pathname[MAXPATHLEN];
 };
 
 struct procctl_reaper_pids32 {
 	u_int	rp_count;
 	u_int	rp_pad0[15];
 	uint32_t rp_pids;
 };
 
 #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */
Index: head/sys/compat/freebsd32/freebsd32_misc.c
===================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c	(revision 318735)
+++ head/sys/compat/freebsd32/freebsd32_misc.c	(revision 318736)
@@ -1,3153 +1,3321 @@
 /*-
  * Copyright (c) 2002 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/clock.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/file.h>		/* Must come after sys/malloc.h */
 #include <sys/imgact.h>
 #include <sys/mbuf.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procctl.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/selinfo.h>
 #include <sys/eventvar.h>	/* Must come after sys/selinfo.h */
 #include <sys/pipe.h>		/* Must come after sys/selinfo.h */
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/thr.h>
 #include <sys/unistd.h>
 #include <sys/ucontext.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #include <sys/ipc.h>
 #include <sys/msg.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/elf.h>
 
 #include <security/audit/audit.h>
 
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_ipc.h>
 #include <compat/freebsd32/freebsd32_misc.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD");
 
 #ifndef __mips__
 CTASSERT(sizeof(struct timeval32) == 8);
 CTASSERT(sizeof(struct timespec32) == 8);
 CTASSERT(sizeof(struct itimerval32) == 16);
 #endif
 CTASSERT(sizeof(struct statfs32) == 256);
 #ifndef __mips__
 CTASSERT(sizeof(struct rusage32) == 72);
 #endif
 CTASSERT(sizeof(struct sigaltstack32) == 12);
 CTASSERT(sizeof(struct kevent32) == 20);
 CTASSERT(sizeof(struct iovec32) == 8);
 CTASSERT(sizeof(struct msghdr32) == 28);
+#ifdef __amd64__
+CTASSERT(sizeof(struct stat32) == 208);
+#endif
 #ifndef __mips__
-CTASSERT(sizeof(struct stat32) == 96);
+CTASSERT(sizeof(struct freebsd11_stat32) == 96);
 #endif
 CTASSERT(sizeof(struct sigaction32) == 24);
 
 static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
 static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp);
 
 void
 freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32)
 {
 
 	TV_CP(*s, *s32, ru_utime);
 	TV_CP(*s, *s32, ru_stime);
 	CP(*s, *s32, ru_maxrss);
 	CP(*s, *s32, ru_ixrss);
 	CP(*s, *s32, ru_idrss);
 	CP(*s, *s32, ru_isrss);
 	CP(*s, *s32, ru_minflt);
 	CP(*s, *s32, ru_majflt);
 	CP(*s, *s32, ru_nswap);
 	CP(*s, *s32, ru_inblock);
 	CP(*s, *s32, ru_oublock);
 	CP(*s, *s32, ru_msgsnd);
 	CP(*s, *s32, ru_msgrcv);
 	CP(*s, *s32, ru_nsignals);
 	CP(*s, *s32, ru_nvcsw);
 	CP(*s, *s32, ru_nivcsw);
 }
 
 int
 freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap)
 {
 	int error, status;
 	struct rusage32 ru32;
 	struct rusage ru, *rup;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (error)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0) {
 		freebsd32_rusage_out(&ru, &ru32);
 		error = copyout(&ru32, uap->rusage, sizeof(ru32));
 	}
 	return (error);
 }
 
 int
 freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap)
 {
 	struct wrusage32 wru32;
 	struct __wrusage wru, *wrup;
 	struct siginfo32 si32;
 	struct __siginfo si, *sip;
 	int error, status;
 
 	if (uap->wrusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 	if (uap->info != NULL) {
 		sip = &si;
 		bzero(sip, sizeof(*sip));
 	} else
 		sip = NULL;
 	error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    &status, uap->options, wrup, sip);
 	if (error != 0)
 		return (error);
 	if (uap->status != NULL)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->wrusage != NULL && error == 0) {
 		freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self);
 		freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children);
 		error = copyout(&wru32, uap->wrusage, sizeof(wru32));
 	}
 	if (uap->info != NULL && error == 0) {
 		siginfo_to_siginfo32 (&si, &si32);
 		error = copyout(&si32, uap->info, sizeof(si32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 static void
 copy_statfs(struct statfs *in, struct statfs32 *out)
 {
 
 	statfs_scale_blocks(in, INT32_MAX);
 	bzero(out, sizeof(*out));
 	CP(*in, *out, f_bsize);
 	out->f_iosize = MIN(in->f_iosize, INT32_MAX);
 	CP(*in, *out, f_blocks);
 	CP(*in, *out, f_bfree);
 	CP(*in, *out, f_bavail);
 	out->f_files = MIN(in->f_files, INT32_MAX);
 	out->f_ffree = MIN(in->f_ffree, INT32_MAX);
 	CP(*in, *out, f_fsid);
 	CP(*in, *out, f_owner);
 	CP(*in, *out, f_type);
 	CP(*in, *out, f_flags);
 	out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX);
 	out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX);
 	strlcpy(out->f_fstypename,
 	      in->f_fstypename, MFSNAMELEN);
 	strlcpy(out->f_mntonname,
 	      in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 	out->f_syncreads = MIN(in->f_syncreads, INT32_MAX);
 	out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX);
 	strlcpy(out->f_mntfromname,
 	      in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN));
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_getfsstat(struct thread *td,
     struct freebsd4_freebsd32_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct statfs32 stat32;
 	size_t count, size, copycount;
 	int error;
 
 	count = uap->bufsize / sizeof(struct statfs32);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode);
 	if (size > 0) {
 		sp = buf;
 		copycount = count;
 		while (copycount > 0 && error == 0) {
 			copy_statfs(sp, &stat32);
 			error = copyout(&stat32, uap->buf, sizeof(stat32));
 			sp++;
 			uap->buf++;
 			copycount--;
 		}
 		free(buf, M_STATFS);
 	}
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD10
 int
 freebsd10_freebsd32_pipe(struct thread *td,
     struct freebsd10_freebsd32_pipe_args *uap) {
 	
 	return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap));
 }
 #endif
 
 int
 freebsd32_sigaltstack(struct thread *td,
 		      struct freebsd32_sigaltstack_args *uap)
 {
 	struct sigaltstack32 s32;
 	struct sigaltstack ss, oss, *ssp;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		PTRIN_CP(s32, ss, ss_sp);
 		CP(s32, ss, ss_size);
 		CP(s32, ss, ss_flags);
 		ssp = &ss;
 	} else
 		ssp = NULL;
 	error = kern_sigaltstack(td, ssp, &oss);
 	if (error == 0 && uap->oss != NULL) {
 		PTROUT_CP(oss, s32, ss_sp);
 		CP(oss, s32, ss_size);
 		CP(oss, s32, ss_flags);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 
 /*
  * Custom version of exec_copyin_args() so that we can translate
  * the pointers.
  */
 int
 freebsd32_exec_copyin_args(struct image_args *args, char *fname,
     enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv)
 {
 	char *argp, *envp;
 	u_int32_t *p32, arg;
 	size_t length;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = (segflg == UIO_SYSSPACE) ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			goto err_exit;
 	} else
 		length = 0;
 
 	args->begin_argv = args->buf + length;
 	args->endp = args->begin_argv;
 	args->stringspace = ARG_MAX;
 
 	/*
 	 * extract arguments first
 	 */
 	p32 = argv;
 	for (;;) {
 		error = copyin(p32++, &arg, sizeof(arg));
 		if (error)
 			goto err_exit;
 		if (arg == 0)
 			break;
 		argp = PTRIN(arg);
 		error = copyinstr(argp, args->endp, args->stringspace, &length);
 		if (error) {
 			if (error == ENAMETOOLONG)
 				error = E2BIG;
 			goto err_exit;
 		}
 		args->stringspace -= length;
 		args->endp += length;
 		args->argc++;
 	}
 			
 	args->begin_envv = args->endp;
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		p32 = envv;
 		for (;;) {
 			error = copyin(p32++, &arg, sizeof(arg));
 			if (error)
 				goto err_exit;
 			if (arg == 0)
 				break;
 			envp = PTRIN(arg);
 			error = copyinstr(envp, args->endp, args->stringspace,
 			    &length);
 			if (error) {
 				if (error == ENAMETOOLONG)
 					error = E2BIG;
 				goto err_exit;
 			}
 			args->stringspace -= length;
 			args->endp += length;
 			args->envc++;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &eargs, NULL);
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 int
 freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap)
 {
 	struct image_args eargs;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		eargs.fd = uap->fd;
 		error = kern_execve(td, &eargs, NULL);
 	}
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
+#if defined(COMPAT_FREEBSD11)
 int
+freebsd11_freebsd32_mknod(struct thread *td,
+    struct freebsd11_freebsd32_mknod_args *uap)
+{
+
+	return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode,
+	    uap->dev));
+}
+
+int
+freebsd11_freebsd32_mknodat(struct thread *td,
+    struct freebsd11_freebsd32_mknodat_args *uap)
+{
+
+	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
+	    uap->dev));
+}
+#endif /* COMPAT_FREEBSD11 */
+
+int
 freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ) != 0)
 		prot |= PROT_EXEC;
 #endif
 	return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len,
 	    prot));
 }
 
 int
 freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot,
 	    uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos)));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_mmap(struct thread *td,
     struct freebsd6_freebsd32_mmap_args *uap)
 {
 	int prot;
 
 	prot = uap->prot;
 #if defined(__amd64__)
 	if (i386_read_exec && (prot & PROT_READ))
 		prot |= PROT_EXEC;
 #endif
 
 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot,
 	    uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos)));
 }
 #endif
 
 int
 freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap)
 {
 	struct itimerval itv, oitv, *itvp;	
 	struct itimerval32 i32;
 	int error;
 
 	if (uap->itv != NULL) {
 		error = copyin(uap->itv, &i32, sizeof(i32));
 		if (error)
 			return (error);
 		TV_CP(i32, itv, it_interval);
 		TV_CP(i32, itv, it_value);
 		itvp = &itv;
 	} else
 		itvp = NULL;
 	error = kern_setitimer(td, uap->which, itvp, &oitv);
 	if (error || uap->oitv == NULL)
 		return (error);
 	TV_CP(oitv, i32, it_interval);
 	TV_CP(oitv, i32, it_value);
 	return (copyout(&i32, uap->oitv, sizeof(i32)));
 }
 
 int
 freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap)
 {
 	struct itimerval itv;
 	struct itimerval32 i32;
 	int error;
 
 	error = kern_getitimer(td, uap->which, &itv);
 	if (error || uap->itv == NULL)
 		return (error);
 	TV_CP(itv, i32, it_interval);
 	TV_CP(itv, i32, it_value);
 	return (copyout(&i32, uap->itv, sizeof(i32)));
 }
 
 int
 freebsd32_select(struct thread *td, struct freebsd32_select_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	int error;
 
 	if (uap->tv != NULL) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    sizeof(int32_t) * 8));
 }
 
 int
 freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timeval tv, *tvp;
 	sigset_t set, *uset;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->sm != NULL) {
 		error = copyin(uap->sm, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		uset = &set;
 	} else
 		uset = NULL;
 	/*
 	 * XXX Do pointers need PTRIN()?
 	 */
 	error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    uset, sizeof(int32_t) * 8);
 	return (error);
 }
 
 /*
  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  */
 static int
 freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	int i, error = 0;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		CP(kevp[i], ks32[i], ident);
 		CP(kevp[i], ks32[i], filter);
 		CP(kevp[i], ks32[i], flags);
 		CP(kevp[i], ks32[i], fflags);
 		CP(kevp[i], ks32[i], data);
 		PTROUT_CP(kevp[i], ks32[i], udata);
 	}
 	error = copyout(ks32, uap->eventlist, count * sizeof *ks32);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd32_kevent_args *uap;
 	struct kevent32	ks32[KQ_NEVENTS];
 	int i, error = 0;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd32_kevent_args *)arg;
 
 	error = copyin(uap->changelist, ks32, count * sizeof *ks32);
 	if (error)
 		goto done;
 	uap->changelist += count;
 
 	for (i = 0; i < count; i++) {
 		CP(ks32[i], kevp[i], ident);
 		CP(ks32[i], kevp[i], filter);
 		CP(ks32[i], kevp[i], flags);
 		CP(ks32[i], kevp[i], fflags);
 		CP(ks32[i], kevp[i], data);
 		PTRIN_CP(ks32[i], kevp[i], udata);
 	}
 done:
 	return (error);
 }
 
 int
 freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = freebsd32_kevent_copyout,
 		.k_copyin = freebsd32_kevent_copyin,
 	};
 	int error;
 
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    &k_ops, tsp);
 	return (error);
 }
 
 int
 freebsd32_gettimeofday(struct thread *td,
 		       struct freebsd32_gettimeofday_args *uap)
 {
 	struct timeval atv;
 	struct timeval32 atv32;
 	struct timezone rtz;
 	int error = 0;
 
 	if (uap->tp) {
 		microtime(&atv);
 		CP(atv, atv32, tv_sec);
 		CP(atv, atv32, tv_usec);
 		error = copyout(&atv32, uap->tp, sizeof (atv32));
 	}
 	if (error == 0 && uap->tzp != NULL) {
 		rtz.tz_minuteswest = tz_minuteswest;
 		rtz.tz_dsttime = tz_dsttime;
 		error = copyout(&rtz, uap->tzp, sizeof (rtz));
 	}
 	return (error);
 }
 
 int
 freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
 {
 	struct rusage32 s32;
 	struct rusage s;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &s);
 	if (error)
 		return (error);
 	if (uap->rusage != NULL) {
 		freebsd32_rusage_out(&s, &s32);
 		error = copyout(&s32, uap->rusage, sizeof(s32));
 	}
 	return (error);
 }
 
 static int
 freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	struct uio *uio;
 	u_int iovlen;
 	int error, i;
 
 	*uiop = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (EINVAL);
 	iovlen = iovcnt * sizeof(struct iovec);
 	uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
 	iov = (struct iovec *)(uio + 1);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(uio, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = iovcnt;
 	uio->uio_segflg = UIO_USERSPACE;
 	uio->uio_offset = -1;
 	uio->uio_resid = 0;
 	for (i = 0; i < iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
 			free(uio, M_IOV);
 			return (EINVAL);
 		}
 		uio->uio_resid += iov->iov_len;
 		iov++;
 	}
 	*uiop = uio;
 	return (0);
 }
 
 int
 freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp,
     int error)
 {
 	struct iovec32 iov32;
 	struct iovec *iov;
 	u_int iovlen;
 	int i;
 
 	*iovp = NULL;
 	if (iovcnt > UIO_MAXIOV)
 		return (error);
 	iovlen = iovcnt * sizeof(struct iovec);
 	iov = malloc(iovlen, M_IOV, M_WAITOK);
 	for (i = 0; i < iovcnt; i++) {
 		error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32));
 		if (error) {
 			free(iov, M_IOV);
 			return (error);
 		}
 		iov[i].iov_base = PTRIN(iov32.iov_base);
 		iov[i].iov_len = iov32.iov_len;
 	}
 	*iovp = iov;
 	return (0);
 }
 
 static int
 freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg)
 {
 	struct msghdr32 m32;
 	int error;
 
 	error = copyin(msg32, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	msg->msg_name = PTRIN(m32.msg_name);
 	msg->msg_namelen = m32.msg_namelen;
 	msg->msg_iov = PTRIN(m32.msg_iov);
 	msg->msg_iovlen = m32.msg_iovlen;
 	msg->msg_control = PTRIN(m32.msg_control);
 	msg->msg_controllen = m32.msg_controllen;
 	msg->msg_flags = m32.msg_flags;
 	return (0);
 }
 
 static int
 freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32)
 {
 	struct msghdr32 m32;
 	int error;
 
 	m32.msg_name = PTROUT(msg->msg_name);
 	m32.msg_namelen = msg->msg_namelen;
 	m32.msg_iov = PTROUT(msg->msg_iov);
 	m32.msg_iovlen = msg->msg_iovlen;
 	m32.msg_control = PTROUT(msg->msg_control);
 	m32.msg_controllen = msg->msg_controllen;
 	m32.msg_flags = msg->msg_flags;
 	error = copyout(&m32, msg32, sizeof(m32));
 	return (error);
 }
 
 #ifndef __mips__
 #define FREEBSD32_ALIGNBYTES	(sizeof(int) - 1)
 #else
 #define FREEBSD32_ALIGNBYTES	(sizeof(long) - 1)
 #endif
 #define FREEBSD32_ALIGN(p)	\
 	(((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES)
 #define	FREEBSD32_CMSG_SPACE(l)	\
 	(FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l))
 
 #define	FREEBSD32_CMSG_DATA(cmsg)	((unsigned char *)(cmsg) + \
 				 FREEBSD32_ALIGN(sizeof(struct cmsghdr)))
 static int
 freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control)
 {
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 	int error;
 	caddr_t ctlbuf;
 	int len, maxlen, copylen;
 	struct mbuf *m;
 	error = 0;
 
 	len    = msg->msg_controllen;
 	maxlen = msg->msg_controllen;
 	msg->msg_controllen = 0;
 
 	m = control;
 	ctlbuf = msg->msg_control;
       
 	while (m && len > 0) {
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 
 		while (cm != NULL) {
 
 			if (sizeof(struct cmsghdr) > clen ||
 			    cm->cmsg_len > clen) {
 				error = EINVAL;
 				break;
 			}	
 
 			data   = CMSG_DATA(cm);
 			datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 			/* Adjust message length */
 			cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) +
 			    datalen;
 
 
 			/* Copy cmsghdr */
 			copylen = sizeof(struct cmsghdr);
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				copylen = len;
 			}
 
 			error = copyout(cm,ctlbuf,copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (len <= 0)
 				break;
 
 			/* Copy data */
 			copylen = datalen;
 			if (len < copylen) {
 				msg->msg_flags |= MSG_CTRUNC;
 				copylen = len;
 			}
 
 			error = copyout(data,ctlbuf,copylen);
 			if (error)
 				goto exit;
 
 			ctlbuf += FREEBSD32_ALIGN(copylen);
 			len    -= FREEBSD32_ALIGN(copylen);
 
 			if (CMSG_SPACE(datalen) < clen) {
 				clen -= CMSG_SPACE(datalen);
 				cm = (struct cmsghdr *)
 					((caddr_t)cm + CMSG_SPACE(datalen));
 			} else {
 				clen = 0;
 				cm = NULL;
 			}
 		}	
 		m = m->m_next;
 	}
 
 	msg->msg_controllen = (len <= 0) ? maxlen :  ctlbuf - (caddr_t)msg->msg_control;
 	
 exit:
 	return (error);
 
 }
 
 int
 freebsd32_recvmsg(td, uap)
 	struct thread *td;
 	struct freebsd32_recvmsg_args /* {
 		int	s;
 		struct	msghdr32 *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *uiov, *iov;
 	struct mbuf *control = NULL;
 	struct mbuf **controlp;
 
 	int error;
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_flags = uap->flags;
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 
 	controlp = (msg.msg_control != NULL) ?  &control : NULL;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		
 		if (control != NULL)
 			error = freebsd32_copy_msg_out(&msg, control);
 		else
 			msg.msg_controllen = 0;
 		
 		if (error == 0)
 			error = freebsd32_copyoutmsghdr(&msg, uap->msg);
 	}
 	free(iov, M_IOV);
 
 	if (control != NULL)
 		m_freem(control);
 
 	return (error);
 }
 
 /*
  * Copy-in the array of control messages constructed using alignment
  * and padding suitable for a 32-bit environment and construct an
  * mbuf using alignment and padding suitable for a 64-bit kernel.
  * The alignment and padding are defined indirectly by CMSG_DATA(),
  * CMSG_SPACE() and CMSG_LEN().
  */
 static int
 freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen)
 {
 	struct mbuf *m;
 	void *md;
 	u_int idx, len, msglen;
 	int error;
 
 	buflen = FREEBSD32_ALIGN(buflen);
 
 	if (buflen > MCLBYTES)
 		return (EINVAL);
 
 	/*
 	 * Iterate over the buffer and get the length of each message
 	 * in there. This has 32-bit alignment and padding. Use it to
 	 * determine the length of these messages when using 64-bit
 	 * alignment and padding.
 	 */
 	idx = 0;
 	len = 0;
 	while (idx < buflen) {
 		error = copyin(buf + idx, &msglen, sizeof(msglen));
 		if (error)
 			return (error);
 		if (msglen < sizeof(struct cmsghdr))
 			return (EINVAL);
 		msglen = FREEBSD32_ALIGN(msglen);
 		if (idx + msglen > buflen)
 			return (EINVAL);
 		idx += msglen;
 		msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) -
 		    FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		len += CMSG_ALIGN(msglen);
 	}
 
 	if (len > MCLBYTES)
 		return (EINVAL);
 
 	m = m_get(M_WAITOK, MT_CONTROL);
 	if (len > MLEN)
 		MCLGET(m, M_WAITOK);
 	m->m_len = len;
 
 	md = mtod(m, void *);
 	while (buflen > 0) {
 		error = copyin(buf, md, sizeof(struct cmsghdr));
 		if (error)
 			break;
 		msglen = *(u_int *)md;
 		msglen = FREEBSD32_ALIGN(msglen);
 
 		/* Modify the message length to account for alignment. */
 		*(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) -
 		    FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 
 		md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr));
 		buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 
 		msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr));
 		if (msglen > 0) {
 			error = copyin(buf, md, msglen);
 			if (error)
 				break;
 			md = (char *)md + CMSG_ALIGN(msglen);
 			buf += msglen;
 			buflen -= msglen;
 		}
 	}
 
 	if (error)
 		m_free(m);
 	else
 		*mp = m;
 	return (error);
 }
 
 int
 freebsd32_sendmsg(struct thread *td,
 		  struct freebsd32_sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct msghdr32 m32;
 	struct iovec *iov;
 	struct mbuf *control = NULL;
 	struct sockaddr *to = NULL;
 	int error;
 
 	error = copyin(uap->msg, &m32, sizeof(m32));
 	if (error)
 		return (error);
 	error = freebsd32_copyinmsghdr(uap->msg, &msg);
 	if (error)
 		return (error);
 	error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov,
 	    EMSGSIZE);
 	if (error)
 		return (error);
 	msg.msg_iov = iov;
 	if (msg.msg_name != NULL) {
 		error = getsockaddr(&to, msg.msg_name, msg.msg_namelen);
 		if (error) {
 			to = NULL;
 			goto out;
 		}
 		msg.msg_name = to;
 	}
 
 	if (msg.msg_control) {
 		if (msg.msg_controllen < sizeof(struct cmsghdr)) {
 			error = EINVAL;
 			goto out;
 		}
 
 		error = freebsd32_copyin_control(&control, msg.msg_control,
 		    msg.msg_controllen);
 		if (error)
 			goto out;
 
 		msg.msg_control = NULL;
 		msg.msg_controllen = 0;
 	}
 
 	error = kern_sendit(td, uap->s, &msg, uap->flags, control,
 	    UIO_USERSPACE);
 
 out:
 	free(iov, M_IOV);
 	if (to)
 		free(to, M_SONAME);
 	return (error);
 }
 
 int
 freebsd32_recvfrom(struct thread *td,
 		   struct freebsd32_recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen,
 		    sizeof(msg.msg_namelen));
 		if (error)
 			return (error);
 	} else {
 		msg.msg_namelen = 0;
 	}
 
 	msg.msg_name = PTRIN(uap->from);
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = PTRIN(uap->buf);
 	aiov.iov_len = uap->len;
 	msg.msg_control = NULL;
 	msg.msg_flags = uap->flags;
 	error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL);
 	if (error == 0 && uap->fromlenaddr)
 		error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr),
 		    sizeof (msg.msg_namelen));
 	return (error);
 }
 
 int
 freebsd32_settimeofday(struct thread *td,
 		       struct freebsd32_settimeofday_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval tv, *tvp;
 	struct timezone tz, *tzp;
 	int error;
 
 	if (uap->tv) {
 		error = copyin(uap->tv, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, tv, tv_sec);
 		CP(tv32, tv, tv_usec);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->tzp) {
 		error = copyin(uap->tzp, &tz, sizeof(tz));
 		if (error)
 			return (error);
 		tzp = &tz;
 	} else
 		tzp = NULL;
 	return (kern_settimeofday(td, tvp, tzp));
 }
 
 int
 freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->tptr != NULL) {
 		error = copyin(uap->tptr, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap)
 {
 	struct timeval32 s32[2];
 	struct timeval s[2], *sp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, s32, sizeof(s32));
 		if (error)
 			return (error);
 		CP(s32[0], s[0], tv_sec);
 		CP(s32[0], s[0], tv_usec);
 		CP(s32[1], s[1], tv_sec);
 		CP(s32[1], s[1], tv_usec);
 		sp = s;
 	} else
 		sp = NULL;
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 		sp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE));
 }
 
 int
 freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap)
 {
 	struct timespec32 ts32[2];
 	struct timespec ts[2], *tsp;
 	int error;
 
 	if (uap->times != NULL) {
 		error = copyin(uap->times, ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32[0], ts[0], tv_sec);
 		CP(ts32[0], ts[0], tv_nsec);
 		CP(ts32[1], ts[1], tv_sec);
 		CP(ts32[1], ts[1], tv_nsec);
 		tsp = ts;
 	} else
 		tsp = NULL;
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    tsp, UIO_SYSSPACE, uap->flag));
 }
 
 int
 freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap)
 {
 	struct timeval32 tv32;
 	struct timeval delta, olddelta, *deltap;
 	int error;
 
 	if (uap->delta) {
 		error = copyin(uap->delta, &tv32, sizeof(tv32));
 		if (error)
 			return (error);
 		CP(tv32, delta, tv_sec);
 		CP(tv32, delta, tv_usec);
 		deltap = &delta;
 	} else
 		deltap = NULL;
 	error = kern_adjtime(td, deltap, &olddelta);
 	if (uap->olddelta && error == 0) {
 		CP(olddelta, tv32, tv_sec);
 		CP(olddelta, tv32, tv_usec);
 		error = copyout(&tv32, uap->olddelta, sizeof(tv32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	int error;
 
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap)
 {
 	struct statfs32 s32;
 	struct statfs *sp;
 	fhandle_t fh;
 	int error;
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sp);
 	if (error == 0) {
 		copy_statfs(sp, &s32);
 		error = copyout(&s32, uap->buf, sizeof(s32));
 	}
 	free(sp, M_STATFS);
 	return (error);
 }
 #endif
 
 int
 freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 int
 freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = td->td_uretoff.tdu_off;
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 int
 freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_getdirentries(struct thread *td,
     struct ofreebsd32_getdirentries_args *uap)
 {
 	struct ogetdirentries_args ap;
 	int error;
 	long loff;
 	int32_t loff_cut;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	error = kern_ogetdirentries(td, &ap, &loff);
 	if (error == 0) {
 		loff_cut = loff;
 		error = copyout(&loff_cut, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 #endif
 
+#if defined(COMPAT_FREEBSD11)
 int
+freebsd11_freebsd32_getdirentries(struct thread *td,
+    struct freebsd11_freebsd32_getdirentries_args *uap)
+{
+	long base;
+	int32_t base32;
+	int error;
+
+	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
+	    &base, NULL);
+	if (error)
+		return (error);
+	if (uap->basep != NULL) {
+		base32 = base;
+		error = copyout(&base32, uap->basep, sizeof(int32_t));
+	}
+	return (error);
+}
+
+int
+freebsd11_freebsd32_getdents(struct thread *td,
+    struct freebsd11_freebsd32_getdents_args *uap)
+{
+	struct freebsd11_freebsd32_getdirentries_args ap;
+
+	ap.fd = uap->fd;
+	ap.buf = uap->buf;
+	ap.count = uap->count;
+	ap.basep = NULL;
+	return (freebsd11_freebsd32_getdirentries(td, &ap));
+}
+#endif /* COMPAT_FREEBSD11 */
+
+int
 freebsd32_getdirentries(struct thread *td,
     struct freebsd32_getdirentries_args *uap)
 {
 	long base;
 	int32_t base32;
 	int error;
 
 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 	    NULL, UIO_USERSPACE);
 	if (error)
 		return (error);
 	if (uap->basep != NULL) {
 		base32 = base;
 		error = copyout(&base32, uap->basep, sizeof(int32_t));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD6
 /* versions with the 'int pad' argument */
 int
 freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap)
 {
 
 	return (kern_pread(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte,
 	    PAIR32TO64(off_t, uap->offset)));
 }
 
 int
 freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap)
 {
 	int error;
 	off_t pos;
 
 	error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    uap->whence);
 	/* Expand the quad return into two parts for eax and edx */
 	pos = *(off_t *)(td->td_retval);
 	td->td_retval[RETVAL_LO] = pos & 0xffffffff;	/* %eax */
 	td->td_retval[RETVAL_HI] = pos >> 32;		/* %edx */
 	return error;
 }
 
 int
 freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE,
 	    PAIR32TO64(off_t, uap->length)));
 }
 
 int
 freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length)));
 }
 #endif /* COMPAT_FREEBSD6 */
 
 struct sf_hdtr32 {
 	uint32_t headers;
 	int hdr_cnt;
 	uint32_t trailers;
 	int trl_cnt;
 };
 
 static int
 freebsd32_do_sendfile(struct thread *td,
     struct freebsd32_sendfile_args *uap, int compat)
 {
 	struct sf_hdtr32 hdtr32;
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct file *fp;
 	cap_rights_t rights;
 	struct iovec32 *iov32;
 	off_t offset, sbytes;
 	int error;
 
 	offset = PAIR32TO64(off_t, uap->offset);
 	if (offset < 0)
 		return (EINVAL);
 
 	hdr_uio = trl_uio = NULL;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32));
 		if (error)
 			goto out;
 		PTRIN_CP(hdtr32, hdtr, headers);
 		CP(hdtr32, hdtr, hdr_cnt);
 		PTRIN_CP(hdtr32, hdtr, trailers);
 		CP(hdtr32, hdtr, trl_cnt);
 
 		if (hdtr.headers != NULL) {
 			iov32 = PTRIN(hdtr32.headers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.hdr_cnt, &hdr_uio);
 			if (error)
 				goto out;
 #ifdef COMPAT_FREEBSD4
 			/*
 			 * In FreeBSD < 5.0 the nbytes to send also included
 			 * the header.  If compat is specified subtract the
 			 * header size from nbytes.
 			 */
 			if (compat) {
 				if (uap->nbytes > hdr_uio->uio_resid)
 					uap->nbytes -= hdr_uio->uio_resid;
 				else
 					uap->nbytes = 0;
 			}
 #endif
 		}
 		if (hdtr.trailers != NULL) {
 			iov32 = PTRIN(hdtr32.trailers);
 			error = freebsd32_copyinuio(iov32,
 			    hdtr32.trl_cnt, &trl_uio);
 			if (error)
 				goto out;
 		}
 	}
 
 	AUDIT_ARG_FD(uap->fd);
 
 	if ((error = fget_read(td, uap->fd,
 	    cap_rights_init(&rights, CAP_PREAD), &fp)) != 0)
 		goto out;
 
 	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset,
 	    uap->nbytes, &sbytes, uap->flags, td);
 	fdrop(fp, td);
 
 	if (uap->sbytes != NULL)
 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
 
 out:
 	if (hdr_uio)
 		free(hdr_uio, M_IOV);
 	if (trl_uio)
 		free(trl_uio, M_IOV);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sendfile(struct thread *td,
     struct freebsd4_freebsd32_sendfile_args *uap)
 {
 	return (freebsd32_do_sendfile(td,
 	    (struct freebsd32_sendfile_args *)uap, 1));
 }
 #endif
 
 int
 freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap)
 {
 
 	return (freebsd32_do_sendfile(td, uap, 0));
 }
 
 static void
 copy_stat(struct stat *in, struct stat32 *out)
 {
 
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_size);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 	TS_CP(*in, *out, st_birthtim);
+	out->st_padding0 = 0;
+	out->st_padding1 = 0;
+#ifdef __STAT32_TIME_T_EXT
+	out->st_atim_ext = 0;
+	out->st_mtim_ext = 0;
+	out->st_ctim_ext = 0;
+	out->st_btim_ext = 0;
+#endif
+	bzero(out->st_spare, sizeof(out->st_spare));
 }
 
 #ifdef COMPAT_43
 static void
 copy_ostat(struct stat *in, struct ostat32 *out)
 {
 
 	CP(*in, *out, st_dev);
 	CP(*in, *out, st_ino);
 	CP(*in, *out, st_mode);
 	CP(*in, *out, st_nlink);
 	CP(*in, *out, st_uid);
 	CP(*in, *out, st_gid);
 	CP(*in, *out, st_rdev);
 	CP(*in, *out, st_size);
 	TS_CP(*in, *out, st_atim);
 	TS_CP(*in, *out, st_mtim);
 	TS_CP(*in, *out, st_ctim);
 	CP(*in, *out, st_blksize);
 	CP(*in, *out, st_blocks);
 	CP(*in, *out, st_flags);
 	CP(*in, *out, st_gen);
 }
 #endif
 
-int
-freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap)
-{
-	struct stat sb;
-	struct stat32 sb32;
-	int error;
-
-	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
-	    &sb, NULL);
-	if (error)
-		return (error);
-	copy_stat(&sb, &sb32);
-	error = copyout(&sb32, uap->ub, sizeof (sb32));
-	return (error);
-}
-
 #ifdef COMPAT_43
 int
 ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap)
 {
 	struct stat sb;
 	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error)
 		return (error);
 	copy_ostat(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->ub, sizeof(ub32));
 	return (error);
 }
 
 #ifdef COMPAT_43
 int
 ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap)
 {
 	struct stat ub;
 	struct ostat32 ub32;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error)
 		return (error);
 	copy_ostat(&ub, &ub32);
 	error = copyout(&ub32, uap->ub, sizeof(ub32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap)
 {
 	struct stat ub;
 	struct stat32 ub32;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
 	    &ub, NULL);
 	if (error)
 		return (error);
 	copy_stat(&ub, &ub32);
 	error = copyout(&ub32, uap->buf, sizeof(ub32));
 	return (error);
 }
 
+#ifdef COMPAT_43
 int
-freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap)
+ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap)
 {
 	struct stat sb;
-	struct stat32 sb32;
+	struct ostat32 sb32;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error)
 		return (error);
+	copy_ostat(&sb, &sb32);
+	error = copyout(&sb32, uap->ub, sizeof (sb32));
+	return (error);
+}
+#endif
+
+int
+freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap)
+{
+	struct stat sb;
+	struct stat32 sb32;
+	struct fhandle fh;
+	int error;
+
+	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
+        if (error != 0)
+                return (error);
+	error = kern_fhstat(td, fh, &sb);
+	if (error != 0)
+		return (error);
 	copy_stat(&sb, &sb32);
+	error = copyout(&sb32, uap->sb, sizeof (sb32));
+	return (error);
+}
+
+#if defined(COMPAT_FREEBSD11)
+static void
+freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out)
+{
+
+	CP(*in, *out, st_ino);
+	CP(*in, *out, st_nlink);
+	CP(*in, *out, st_dev);
+	CP(*in, *out, st_mode);
+	CP(*in, *out, st_uid);
+	CP(*in, *out, st_gid);
+	CP(*in, *out, st_rdev);
+	TS_CP(*in, *out, st_atim);
+	TS_CP(*in, *out, st_mtim);
+	TS_CP(*in, *out, st_ctim);
+	CP(*in, *out, st_size);
+	CP(*in, *out, st_blocks);
+	CP(*in, *out, st_blksize);
+	CP(*in, *out, st_flags);
+	CP(*in, *out, st_gen);
+	TS_CP(*in, *out, st_birthtim);
+	out->st_lspare = 0;
+	bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim),
+	    sizeof(*out) - offsetof(struct freebsd11_stat32,
+	    st_birthtim) - sizeof(out->st_birthtim));
+}
+
+int
+freebsd11_freebsd32_stat(struct thread *td,
+    struct freebsd11_freebsd32_stat_args *uap)
+{
+	struct stat sb;
+	struct freebsd11_stat32 sb32;
+	int error;
+
+	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
+	    &sb, NULL);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat32(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
 	return (error);
 }
 
-#ifdef COMPAT_43
 int
-ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap)
+freebsd11_freebsd32_fstat(struct thread *td,
+    struct freebsd11_freebsd32_fstat_args *uap)
 {
 	struct stat sb;
-	struct ostat32 sb32;
+	struct freebsd11_stat32 sb32;
 	int error;
 
+	error = kern_fstat(td, uap->fd, &sb);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat32(&sb, &sb32);
+	error = copyout(&sb32, uap->ub, sizeof (sb32));
+	return (error);
+}
+
+int
+freebsd11_freebsd32_fstatat(struct thread *td,
+    struct freebsd11_freebsd32_fstatat_args *uap)
+{
+	struct stat sb;
+	struct freebsd11_stat32 sb32;
+	int error;
+
+	error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE,
+	    &sb, NULL);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat32(&sb, &sb32);
+	error = copyout(&sb32, uap->buf, sizeof (sb32));
+	return (error);
+}
+
+int
+freebsd11_freebsd32_lstat(struct thread *td,
+    struct freebsd11_freebsd32_lstat_args *uap)
+{
+	struct stat sb;
+	struct freebsd11_stat32 sb32;
+	int error;
+
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error)
 		return (error);
-	copy_ostat(&sb, &sb32);
+	freebsd11_cvtstat32(&sb, &sb32);
 	error = copyout(&sb32, uap->ub, sizeof (sb32));
+	return (error);
+}
+
+int
+freebsd11_freebsd32_fhstat(struct thread *td,
+    struct freebsd11_freebsd32_fhstat_args *uap)
+{
+	struct stat sb;
+	struct freebsd11_stat32 sb32;
+	struct fhandle fh;
+	int error;
+
+	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
+        if (error != 0)
+                return (error);
+	error = kern_fhstat(td, fh, &sb);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat32(&sb, &sb32);
+	error = copyout(&sb32, uap->sb, sizeof (sb32));
 	return (error);
 }
 #endif
 
 int
 freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap)
 {
 	int error, name[CTL_MAXNAME];
 	size_t j, oldlen;
 	uint32_t tmp;
 
 	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
 		return (EINVAL);
  	error = copyin(uap->name, name, uap->namelen * sizeof(int));
  	if (error)
 		return (error);
 	if (uap->oldlenp) {
 		error = fueword32(uap->oldlenp, &tmp);
 		oldlen = tmp;
 	} else {
 		oldlen = 0;
 	}
 	if (error != 0)
 		return (EFAULT);
 	error = userland_sysctl(td, name, uap->namelen,
 		uap->old, &oldlen, 1,
 		uap->new, uap->newlen, &j, SCTL_MASK32);
 	if (error && error != ENOMEM)
 		return (error);
 	if (uap->oldlenp)
 		suword32(uap->oldlenp, j);
 	return (0);
 }
 
 int
 freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		/* FreeBSD single IPv4 jails. */
 		struct jail32_v0 j32_v0;
 
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
 		if (error)
 			return (error);
 		CP(j32_v0, j, version);
 		PTRIN_CP(j32_v0, j, path);
 		PTRIN_CP(j32_v0, j, hostname);
 		j.ip4s = htonl(j32_v0.ip_number);	/* jail_v0 is host order */
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 	{
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		struct jail32 j32;
 
 		error = copyin(uap->jail, &j32, sizeof(struct jail32));
 		if (error)
 			return (error);
 		CP(j32, j, version);
 		PTRIN_CP(j32, j, path);
 		PTRIN_CP(j32, j, hostname);
 		PTRIN_CP(j32, j, jailname);
 		CP(j32, j, ip4s);
 		CP(j32, j, ip6s);
 		PTRIN_CP(j32, j, ip4);
 		PTRIN_CP(j32, j, ip6);
 		break;
 	}
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap)
 {
 	struct iovec32 iov32;
 	struct uio *auio;
 	int error, i;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		for (i = 0; i < uap->iovcnt; i++) {
 			PTROUT_CP(auio->uio_iov[i], iov32, iov_base);
 			CP(auio->uio_iov[i], iov32, iov_len);
 			error = copyout(&iov32, uap->iovp + i, sizeof(iov32));
 			if (error != 0)
 				break;
 		}
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, 0);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_freebsd32_sigaction(struct thread *td,
 			     struct freebsd4_freebsd32_sigaction_args *uap)
 {
 	struct sigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->act) {
 		error = copyin(uap->act, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		CP(s32, sa, sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4);
 	if (error == 0 && uap->oact != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		CP(osa, s32, sa_mask);
 		error = copyout(&s32, uap->oact, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 #ifdef COMPAT_43
 struct osigaction32 {
 	u_int32_t	sa_u;
 	osigset_t	sa_mask;
 	int		sa_flags;
 };
 
 #define	ONSIG	32
 
 int
 ofreebsd32_sigaction(struct thread *td,
 			     struct ofreebsd32_sigaction_args *uap)
 {
 	struct osigaction32 s32;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsa) {
 		error = copyin(uap->nsa, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(s32.sa_u);
 		CP(s32, sa, sa_flags);
 		OSIG2SIG(s32.sa_mask, sa.sa_mask);
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osa != NULL) {
 		s32.sa_u = PTROUT(osa.sa_handler);
 		CP(osa, s32, sa_flags);
 		SIG2OSIG(osa.sa_mask, s32.sa_mask);
 		error = copyout(&s32, uap->osa, sizeof(s32));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigprocmask(struct thread *td,
 			       struct ofreebsd32_sigprocmask_args *uap)
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 
 int
 ofreebsd32_sigpending(struct thread *td,
 			      struct ofreebsd32_sigpending_args *uap)
 {
 	struct proc *p = td->td_proc;
 	sigset_t siglist;
 
 	PROC_LOCK(p);
 	siglist = p->p_siglist;
 	SIGSETOR(siglist, td->td_siglist);
 	PROC_UNLOCK(p);
 	SIG2OSIG(siglist, td->td_retval[0]);
 	return (0);
 }
 
 struct sigvec32 {
 	u_int32_t	sv_handler;
 	int		sv_mask;
 	int		sv_flags;
 };
 
 int
 ofreebsd32_sigvec(struct thread *td,
 			  struct ofreebsd32_sigvec_args *uap)
 {
 	struct sigvec32 vec;
 	struct sigaction sa, osa, *sap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	if (uap->nsv) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		sa.sa_handler = PTRIN(vec.sv_handler);
 		OSIG2SIG(vec.sv_mask, sa.sa_mask);
 		sa.sa_flags = vec.sv_flags;
 		sa.sa_flags ^= SA_RESTART;
 		sap = &sa;
 	} else
 		sap = NULL;
 	error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET);
 	if (error == 0 && uap->osv != NULL) {
 		vec.sv_handler = PTROUT(osa.sa_handler);
 		SIG2OSIG(osa.sa_mask, vec.sv_mask);
 		vec.sv_flags = osa.sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 int
 ofreebsd32_sigblock(struct thread *td,
 			    struct ofreebsd32_sigblock_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsetmask(struct thread *td,
 			      struct ofreebsd32_sigsetmask_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 int
 ofreebsd32_sigsuspend(struct thread *td,
 			      struct ofreebsd32_sigsuspend_args *uap)
 {
 	sigset_t mask;
 
 	OSIG2SIG(uap->mask, mask);
 	return (kern_sigsuspend(td, mask));
 }
 
 struct sigstack32 {
 	u_int32_t	ss_sp;
 	int		ss_onstack;
 };
 
 int
 ofreebsd32_sigstack(struct thread *td,
 			    struct ofreebsd32_sigstack_args *uap)
 {
 	struct sigstack32 s32;
 	struct sigstack nss, oss;
 	int error = 0, unss;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		nss.ss_sp = PTRIN(s32.ss_sp);
 		CP(s32, nss, ss_onstack);
 		unss = 1;
 	} else {
 		unss = 0;
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (unss) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK);
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL) {
 		s32.ss_sp = PTROUT(oss.ss_sp);
 		CP(oss, s32, ss_onstack);
 		error = copyout(&s32, uap->oss, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap)
 {
 
 	return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME,
 	    TIMER_RELTIME, uap->rqtp, uap->rmtp));
 }
 
 int
 freebsd32_clock_nanosleep(struct thread *td,
     struct freebsd32_clock_nanosleep_args *uap)
 {
 	int error;
 
 	error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags,
 	    uap->rqtp, uap->rmtp);
 	return (kern_posix_error(td, error));
 }
 
 static int
 freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id,
     int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp)
 {
 	struct timespec32 rmt32, rqt32;
 	struct timespec rmt, rqt;
 	int error;
 
 	error = copyin(ua_rqtp, &rqt32, sizeof(rqt32));
 	if (error)
 		return (error);
 
 	CP(rqt32, rqt, tv_sec);
 	CP(rqt32, rqt, tv_nsec);
 
 	if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 &&
 	    !useracc(ua_rmtp, sizeof(rmt32), VM_PROT_WRITE))
 		return (EFAULT);
 	error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt);
 	if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) {
 		int error2;
 
 		CP(rmt, rmt32, tv_sec);
 		CP(rmt, rmt32, tv_nsec);
 
 		error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32));
 		if (error2)
 			error = error2;
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_gettime(struct thread *td,
 			struct freebsd32_clock_gettime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = kern_clock_gettime(td, uap->clock_id, &ats);
 	if (error == 0) {
 		CP(ats, ats32, tv_sec);
 		CP(ats, ats32, tv_nsec);
 		error = copyout(&ats32, uap->tp, sizeof(ats32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_settime(struct thread *td,
 			struct freebsd32_clock_settime_args *uap)
 {
 	struct timespec	ats;
 	struct timespec32 ats32;
 	int error;
 
 	error = copyin(uap->tp, &ats32, sizeof(ats32));
 	if (error)
 		return (error);
 	CP(ats32, ats, tv_sec);
 	CP(ats32, ats, tv_nsec);
 
 	return (kern_clock_settime(td, uap->clock_id, &ats));
 }
 
 int
 freebsd32_clock_getres(struct thread *td,
 		       struct freebsd32_clock_getres_args *uap)
 {
 	struct timespec	ts;
 	struct timespec32 ts32;
 	int error;
 
 	if (uap->tp == NULL)
 		return (0);
 	error = kern_clock_getres(td, uap->clock_id, &ts);
 	if (error == 0) {
 		CP(ts, ts32, tv_sec);
 		CP(ts, ts32, tv_nsec);
 		error = copyout(&ts32, uap->tp, sizeof(ts32));
 	}
 	return (error);
 }
 
 int freebsd32_ktimer_create(struct thread *td,
     struct freebsd32_ktimer_create_args *uap)
 {
 	struct sigevent32 ev32;
 	struct sigevent ev, *evp;
 	int error, id;
 
 	if (uap->evp == NULL) {
 		evp = NULL;
 	} else {
 		evp = &ev;
 		error = copyin(uap->evp, &ev32, sizeof(ev32));
 		if (error != 0)
 			return (error);
 		error = convert_sigevent32(&ev32, &ev);
 		if (error != 0)
 			return (error);
 	}
 	error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
 	if (error == 0) {
 		error = copyout(&id, uap->timerid, sizeof(int));
 		if (error != 0)
 			kern_ktimer_delete(td, id);
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_settime(struct thread *td,
     struct freebsd32_ktimer_settime_args *uap)
 {
 	struct itimerspec32 val32, oval32;
 	struct itimerspec val, oval, *ovalp;
 	int error;
 
 	error = copyin(uap->value, &val32, sizeof(val32));
 	if (error != 0)
 		return (error);
 	ITS_CP(val32, val);
 	ovalp = uap->ovalue != NULL ? &oval : NULL;
 	error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
 	if (error == 0 && uap->ovalue != NULL) {
 		ITS_CP(oval, oval32);
 		error = copyout(&oval32, uap->ovalue, sizeof(oval32));
 	}
 	return (error);
 }
 
 int
 freebsd32_ktimer_gettime(struct thread *td,
     struct freebsd32_ktimer_gettime_args *uap)
 {
 	struct itimerspec32 val32;
 	struct itimerspec val;
 	int error;
 
 	error = kern_ktimer_gettime(td, uap->timerid, &val);
 	if (error == 0) {
 		ITS_CP(val, val32);
 		error = copyout(&val32, uap->value, sizeof(val32));
 	}
 	return (error);
 }
 
 int
 freebsd32_clock_getcpuclockid2(struct thread *td,
     struct freebsd32_clock_getcpuclockid2_args *uap)
 {
 	clockid_t clk_id;
 	int error;
 
 	error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id),
 	    uap->which, &clk_id);
 	if (error == 0)
 		error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
 	return (error);
 }
 
 int
 freebsd32_thr_new(struct thread *td,
 		  struct freebsd32_thr_new_args *uap)
 {
 	struct thr_param32 param32;
 	struct thr_param param;
 	int error;
 
 	if (uap->param_size < 0 ||
 	    uap->param_size > sizeof(struct thr_param32))
 		return (EINVAL);
 	bzero(&param, sizeof(struct thr_param));
 	bzero(&param32, sizeof(struct thr_param32));
 	error = copyin(uap->param, &param32, uap->param_size);
 	if (error != 0)
 		return (error);
 	param.start_func = PTRIN(param32.start_func);
 	param.arg = PTRIN(param32.arg);
 	param.stack_base = PTRIN(param32.stack_base);
 	param.stack_size = param32.stack_size;
 	param.tls_base = PTRIN(param32.tls_base);
 	param.tls_size = param32.tls_size;
 	param.child_tid = PTRIN(param32.child_tid);
 	param.parent_tid = PTRIN(param32.parent_tid);
 	param.flags = param32.flags;
 	param.rtp = PTRIN(param32.rtp);
 	param.spare[0] = PTRIN(param32.spare[0]);
 	param.spare[1] = PTRIN(param32.spare[1]);
 	param.spare[2] = PTRIN(param32.spare[2]);
 
 	return (kern_thr_new(td, &param));
 }
 
 int
 freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	int error;
 
 	error = 0;
 	tsp = NULL;
 	if (uap->timeout != NULL) {
 		error = copyin((const void *)uap->timeout, (void *)&ts32,
 		    sizeof(struct timespec32));
 		if (error != 0)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		tsp = &ts;
 	}
 	return (kern_thr_suspend(td, tsp));
 }
 
 void
 siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst)
 {
 	bzero(dst, sizeof(*dst));
 	dst->si_signo = src->si_signo;
 	dst->si_errno = src->si_errno;
 	dst->si_code = src->si_code;
 	dst->si_pid = src->si_pid;
 	dst->si_uid = src->si_uid;
 	dst->si_status = src->si_status;
 	dst->si_addr = (uintptr_t)src->si_addr;
 	dst->si_value.sival_int = src->si_value.sival_int;
 	dst->si_timerid = src->si_timerid;
 	dst->si_overrun = src->si_overrun;
 }
 
 #ifndef _FREEBSD32_SYSPROTO_H_
 struct freebsd32_sigqueue_args {
         pid_t pid;
         int signum;
         /* union sigval32 */ int value;
 };
 #endif
 int
 freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap)
 {
 	union sigval sv;
 
 	/*
 	 * On 32-bit ABIs, sival_int and sival_ptr are the same.
 	 * On 64-bit little-endian ABIs, the low bits are the same.
 	 * In 64-bit big-endian ABIs, sival_int overlaps with
 	 * sival_ptr's HIGH bits.  We choose to support sival_int
 	 * rather than sival_ptr in this case as it seems to be
 	 * more common.
 	 */
 	bzero(&sv, sizeof(sv));
 	sv.sival_int = uap->value;
 
 	return (kern_sigqueue(td, uap->pid, uap->signum, &sv));
 }
 
 int
 freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		ts.tv_sec = ts32.tv_sec;
 		ts.tv_nsec = ts32.tv_nsec;
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	struct siginfo32 si32;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info) {
 		siginfo_to_siginfo32(&ksi.ksi_info, &si32);
 		error = copyout(&si32, uap->info, sizeof(struct siginfo32));
 	}	
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 freebsd32_cpuset_setid(struct thread *td,
     struct freebsd32_cpuset_setid_args *uap)
 {
 
 	return (kern_cpuset_setid(td, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 int
 freebsd32_cpuset_getid(struct thread *td,
     struct freebsd32_cpuset_getid_args *uap)
 {
 
 	return (kern_cpuset_getid(td, uap->level, uap->which,
 	    PAIR32TO64(id_t, uap->id), uap->setid));
 }
 
 int
 freebsd32_cpuset_getaffinity(struct thread *td,
     struct freebsd32_cpuset_getaffinity_args *uap)
 {
 
 	return (kern_cpuset_getaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
 }
 
 int
 freebsd32_cpuset_setaffinity(struct thread *td,
     struct freebsd32_cpuset_setaffinity_args *uap)
 {
 
 	return (kern_cpuset_setaffinity(td, uap->level, uap->which,
 	    PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
 }
 
 int
 freebsd32_nmount(struct thread *td,
     struct freebsd32_nmount_args /* {
     	struct iovec *iovp;
     	unsigned int iovcnt;
     	int flags;
     } */ *uap)
 {
 	struct uio *auio;
 	uint64_t flags;
 	int error;
 
 	/*
 	 * Mount flags are now 64-bits. On 32-bit archtectures only
 	 * 32-bits are passed in, but from here on everything handles
 	 * 64-bit flags correctly.
 	 */
 	flags = uap->flags;
 
 	AUDIT_ARG_FFLAGS(flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
 	 * MNT_ROOTFS should only be set by the kernel when mounting its
 	 * root file system.
 	 */
 	flags &= ~MNT_ROOTFS;
 
 	/*
 	 * check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((uap->iovcnt & 1) || (uap->iovcnt < 4))
 		return (EINVAL);
 
 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = vfs_donmount(td, flags, auio);
 
 	free(auio, M_IOV);
 	return error;
 }
 
 #if 0
 int
 freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap)
 {
 	struct yyy32 *p32, s32;
 	struct yyy *p = NULL, s;
 	struct xxx_arg ap;
 	int error;
 
 	if (uap->zzz) {
 		error = copyin(uap->zzz, &s32, sizeof(s32));
 		if (error)
 			return (error);
 		/* translate in */
 		p = &s;
 	}
 	error = kern_xxx(td, p);
 	if (error)
 		return (error);
 	if (uap->zzz) {
 		/* translate out */
 		error = copyout(&s32, p32, sizeof(s32));
 	}
 	return (error);
 }
 #endif
 
 int
 syscall32_register(int *offset, struct sysent *new_sysent,
     struct sysent *old_sysent, int flags)
 {
 
 	if ((flags & ~SY_THR_STATIC) != 0)
 		return (EINVAL);
 
 	if (*offset == NO_SYSCALL) {
 		int i;
 
 		for (i = 1; i < SYS_MAXSYSCALL; ++i)
 			if (freebsd32_sysent[i].sy_call ==
 			    (sy_call_t *)lkmnosys)
 				break;
 		if (i == SYS_MAXSYSCALL)
 			return (ENFILE);
 		*offset = i;
 	} else if (*offset < 0 || *offset >= SYS_MAXSYSCALL)
 		return (EINVAL);
 	else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys &&
 	    freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys)
 		return (EEXIST);
 
 	*old_sysent = freebsd32_sysent[*offset];
 	freebsd32_sysent[*offset] = *new_sysent;
 	atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags);
 	return (0);
 }
 
 int
 syscall32_deregister(int *offset, struct sysent *old_sysent)
 {
 
 	if (*offset == 0)
 		return (0);
 
 	freebsd32_sysent[*offset] = *old_sysent;
 	return (0);
 }
 
 int
 syscall32_module_handler(struct module *mod, int what, void *arg)
 {
 	struct syscall_module_data *data = (struct syscall_module_data*)arg;
 	modspecific_t ms;
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		error = syscall32_register(data->offset, data->new_sysent,
 		    &data->old_sysent, SY_THR_STATIC_KLD);
 		if (error) {
 			/* Leave a mark so we know to safely unload below. */
 			data->offset = NULL;
 			return error;
 		}
 		ms.intval = *data->offset;
 		MOD_XLOCK;
 		module_setspecific(mod, &ms);
 		MOD_XUNLOCK;
 		if (data->chainevh)
 			error = data->chainevh(mod, what, data->chainarg);
 		return (error);
 	case MOD_UNLOAD:
 		/*
 		 * MOD_LOAD failed, so just return without calling the
 		 * chained handler since we didn't pass along the MOD_LOAD
 		 * event.
 		 */
 		if (data->offset == NULL)
 			return (0);
 		if (data->chainevh) {
 			error = data->chainevh(mod, what, data->chainarg);
 			if (error)
 				return (error);
 		}
 		error = syscall32_deregister(data->offset, &data->old_sysent);
 		return (error);
 	default:
 		error = EOPNOTSUPP;
 		if (data->chainevh)
 			error = data->chainevh(mod, what, data->chainarg);
 		return (error);
 	}
 }
 
 int
 syscall32_helper_register(struct syscall_helper_data *sd, int flags)
 {
 	struct syscall_helper_data *sd1;
 	int error;
 
 	for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) {
 		error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent,
 		    &sd1->old_sysent, flags);
 		if (error != 0) {
 			syscall32_helper_unregister(sd);
 			return (error);
 		}
 		sd1->registered = 1;
 	}
 	return (0);
 }
 
 int
 syscall32_helper_unregister(struct syscall_helper_data *sd)
 {
 	struct syscall_helper_data *sd1;
 
 	for (sd1 = sd; sd1->registered != 0; sd1++) {
 		syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent);
 		sd1->registered = 0;
 	}
 	return (0);
 }
 
 register_t *
 freebsd32_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc, i;
 	u_int32_t *vectp;
 	char *stringp;
 	uintptr_t destp;
 	u_int32_t *stack_base;
 	struct freebsd32_ps_strings *arginfo;
 	char canary[sizeof(long) * 8];
 	int32_t pagesizes32[MAXPAGESIZES];
 	size_t execpath_len;
 	int szsigcode;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent->
 	    sv_psstrings;
 	if (imgp->proc->p_sysent->sv_sigcode_base == 0)
 		szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
 	else
 		szsigcode = 0;
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode != 0) {
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(uint32_t));
 		copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp,
 		    szsigcode);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		destp -= execpath_len;
 		imgp->execpathp = destp;
 		copyout(imgp->execpath, (void *)destp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = destp;
 	copyout(canary, (void *)destp, sizeof(canary));
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	for (i = 0; i < MAXPAGESIZES; i++)
 		pagesizes32[i] = (uint32_t)pagesizes[i];
 	destp -= sizeof(pagesizes32);
 	destp = rounddown2(destp, sizeof(uint32_t));
 	imgp->pagesizes = destp;
 	copyout(pagesizes32, (void *)destp, sizeof(pagesizes32));
 	imgp->pagesizeslen = sizeof(pagesizes32);
 
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(uint32_t));
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
 			: (AT_COUNT * 2);
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) *
 		    sizeof(u_int32_t));
 	} else {
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2) * sizeof(u_int32_t));
 	}
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword32(vectp++, 0);
 
 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword32(vectp, 0);
 
 	return ((register_t *)stack_base);
 }
 
 int
 freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
 {
 	struct kld_file_stat stat;
 	struct kld32_file_stat stat32;
 	int error, version;
 
 	if ((error = copyin(&uap->stat->version, &version, sizeof(version)))
 	    != 0)
 		return (error);
 	if (version != sizeof(struct kld32_file_stat_1) &&
 	    version != sizeof(struct kld32_file_stat))
 		return (EINVAL);
 
 	error = kern_kldstat(td, uap->fileid, &stat);
 	if (error != 0)
 		return (error);
 
 	bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name));
 	CP(stat, stat32, refs);
 	CP(stat, stat32, id);
 	PTROUT_CP(stat, stat32, address);
 	CP(stat, stat32, size);
 	bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
 	return (copyout(&stat32, uap->stat, version));
 }
 
 int
 freebsd32_posix_fallocate(struct thread *td,
     struct freebsd32_posix_fallocate_args *uap)
 {
 	int error;
 
 	error = kern_posix_fallocate(td, uap->fd,
 	    PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len));
 	return (kern_posix_error(td, error));
 }
 
 int
 freebsd32_posix_fadvise(struct thread *td,
     struct freebsd32_posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset),
 	    PAIR32TO64(off_t, uap->len), uap->advice);
 	return (kern_posix_error(td, error));
 }
 
 int
 convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig)
 {
 
 	CP(*sig32, *sig, sigev_notify);
 	switch (sig->sigev_notify) {
 	case SIGEV_NONE:
 		break;
 	case SIGEV_THREAD_ID:
 		CP(*sig32, *sig, sigev_notify_thread_id);
 		/* FALLTHROUGH */
 	case SIGEV_SIGNAL:
 		CP(*sig32, *sig, sigev_signo);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	case SIGEV_KEVENT:
 		CP(*sig32, *sig, sigev_notify_kqueue);
 		CP(*sig32, *sig, sigev_notify_kevent_flags);
 		PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr);
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 int
 freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
 {
 	void *data;
 	union {
 		struct procctl_reaper_status rs;
 		struct procctl_reaper_pids rp;
 		struct procctl_reaper_kill rk;
 	} x;
 	union {
 		struct procctl_reaper_pids32 rp;
 	} x32;
 	int error, error1, flags;
 
 	switch (uap->com) {
 	case PROC_SPROTECT:
 	case PROC_TRACE_CTL:
 	case PROC_TRAPCAP_CTL:
 		error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
 		if (error != 0)
 			return (error);
 		data = &flags;
 		break;
 	case PROC_REAP_ACQUIRE:
 	case PROC_REAP_RELEASE:
 		if (uap->data != NULL)
 			return (EINVAL);
 		data = NULL;
 		break;
 	case PROC_REAP_STATUS:
 		data = &x.rs;
 		break;
 	case PROC_REAP_GETPIDS:
 		error = copyin(uap->data, &x32.rp, sizeof(x32.rp));
 		if (error != 0)
 			return (error);
 		CP(x32.rp, x.rp, rp_count);
 		PTRIN_CP(x32.rp, x.rp, rp_pids);
 		data = &x.rp;
 		break;
 	case PROC_REAP_KILL:
 		error = copyin(uap->data, &x.rk, sizeof(x.rk));
 		if (error != 0)
 			return (error);
 		data = &x.rk;
 		break;
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		data = &flags;
 		break;
 	default:
 		return (EINVAL);
 	}
 	error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
 	    uap->com, data);
 	switch (uap->com) {
 	case PROC_REAP_STATUS:
 		if (error == 0)
 			error = copyout(&x.rs, uap->data, sizeof(x.rs));
 		break;
 	case PROC_REAP_KILL:
 		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
 		if (error == 0)
 			error = error1;
 		break;
 	case PROC_TRACE_STATUS:
 	case PROC_TRAPCAP_STATUS:
 		if (error == 0)
 			error = copyout(&flags, uap->data, sizeof(flags));
 		break;
 	}
 	return (error);
 }
 
 int
 freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap)
 {
 	long tmp;
 
 	switch (uap->cmd) {
 	/*
 	 * Do unsigned conversion for arg when operation
 	 * interprets it as flags or pointer.
 	 */
 	case F_SETLK_REMOTE:
 	case F_SETLKW:
 	case F_SETLK:
 	case F_GETLK:
 	case F_SETFD:
 	case F_SETFL:
 	case F_OGETLK:
 	case F_OSETLK:
 	case F_OSETLKW:
 		tmp = (unsigned int)(uap->arg);
 		break;
 	default:
 		tmp = uap->arg;
 		break;
 	}
 	return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp));
 }
 
 int
 freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	sigset_t set, *ssp;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts32, sizeof(ts32));
 		if (error != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 	if (uap->set != NULL) {
 		error = copyin(uap->set, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		ssp = &set;
 	} else
 		ssp = NULL;
 
 	return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp));
 }
Index: head/sys/compat/freebsd32/syscalls.master
===================================================================
--- head/sys/compat/freebsd32/syscalls.master	(revision 318735)
+++ head/sys/compat/freebsd32/syscalls.master	(revision 318736)
@@ -1,1086 +1,1113 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;	from: src/sys/kern/syscalls.master 1.107
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number audit type name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	audit	the audit event associated with the system call
 ;		A value of AUE_NULL means no auditing, but it also means that
 ;		there is no audit event for the call at this time. For the
 ;		case where the event exists, but we don't want auditing, the
 ;		event should be #defined to AUE_NULL in audit_kevents.h.
 ;	type	one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6,
-;		COMPAT7, NODEF, NOARGS, NOPROTO, NOSTD
+;		COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD
 ;		The COMPAT* options may be combined with one or more NO*
 ;		options separated by '|' with no spaces (e.g. COMPAT|NOARGS)
 ;	name	psuedo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT4 #ifdef (FreeBSD 4 compat)
 ;	COMPAT6	included on COMPAT6 #ifdef (FreeBSD 6 compat)
 ;	COMPAT7	included on COMPAT7 #ifdef (FreeBSD 7 compat)
 ;	COMPAT10 included on COMPAT10 #ifdef (FreeBSD 10 compat)
+;	COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat)
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;		compiled in; sysent entry will be filled with lkmressys
 ;		so the SYSCALL_MODULE macro works
 ;	NOARGS	same as STD except do not create structure in sys/sysproto.h
 ;	NODEF	same as STD except only have the entry in the syscall table
 ;		added.  Meaning - do not create structure or function
 ;		prototype in sys/sysproto.h
 ;	NOPROTO	same as STD except do not create structure or
 ;		function prototype in sys/sysproto.h.  Does add a
 ;		definition to syscall.h besides adding a sysent.
 
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__))
 #define PAD64_REQUIRED
 #endif
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	AUE_NULL	NOPROTO	{ int nosys(void); } syscall nosys_args int
 1	AUE_EXIT	NOPROTO	{ void sys_exit(int rval); } exit \
 				    sys_exit_args void
 2	AUE_FORK	NOPROTO	{ int fork(void); }
 3	AUE_READ	NOPROTO	{ ssize_t read(int fd, void *buf, \
 				    size_t nbyte); }
 4	AUE_WRITE	NOPROTO	{ ssize_t write(int fd, const void *buf, \
 				    size_t nbyte); }
 5	AUE_OPEN_RWTC	NOPROTO	{ int open(char *path, int flags, \
 				    int mode); }
 6	AUE_CLOSE	NOPROTO	{ int close(int fd); }
 7	AUE_WAIT4	STD	{ int freebsd32_wait4(int pid, int *status, \
 				    int options, struct rusage32 *rusage); }
 8	AUE_CREAT	OBSOL	old creat
 9	AUE_LINK	NOPROTO	{ int link(char *path, char *link); }
 10	AUE_UNLINK	NOPROTO	{ int unlink(char *path); }
 11	AUE_NULL	OBSOL	execv
 12	AUE_CHDIR	NOPROTO	{ int chdir(char *path); }
 13	AUE_FCHDIR	NOPROTO	{ int fchdir(int fd); }
-14	AUE_MKNOD	NOPROTO	{ int mknod(char *path, int mode, int dev); }
+14	AUE_MKNOD	COMPAT11 { int freebsd32_mknod(char *path, \
+					int mode, int dev); }
 15	AUE_CHMOD	NOPROTO	{ int chmod(char *path, int mode); }
 16	AUE_CHOWN	NOPROTO	{ int chown(char *path, int uid, int gid); }
 17	AUE_NULL	NOPROTO	{ int obreak(char *nsize); } break \
 				    obreak_args int
 18	AUE_GETFSSTAT	COMPAT4	{ int freebsd32_getfsstat( \
 				    struct statfs32 *buf, long bufsize, \
 				    int mode); }
 19	AUE_LSEEK	COMPAT	{ int freebsd32_lseek(int fd, int offset, \
 				    int whence); }
 20	AUE_GETPID	NOPROTO	{ pid_t getpid(void); }
 21	AUE_MOUNT	NOPROTO	{ int mount(char *type, char *path, \
 				    int flags, caddr_t data); }
 22	AUE_UMOUNT	NOPROTO	{ int unmount(char *path, int flags); }
 23	AUE_SETUID	NOPROTO	{ int setuid(uid_t uid); }
 24	AUE_GETUID	NOPROTO	{ uid_t getuid(void); }
 25	AUE_GETEUID	NOPROTO	{ uid_t geteuid(void); }
 26	AUE_PTRACE	NOPROTO	{ int ptrace(int req, pid_t pid, \
 				    caddr_t addr, int data); }
 27	AUE_RECVMSG	STD	{ int freebsd32_recvmsg(int s, struct msghdr32 *msg, \
 				    int flags); }
 28	AUE_SENDMSG	STD	{ int freebsd32_sendmsg(int s, struct msghdr32 *msg, \
 				    int flags); }
 29	AUE_RECVFROM	STD	{ int freebsd32_recvfrom(int s, uint32_t buf, \
 				    uint32_t len, int flags, uint32_t from, \
 				    uint32_t fromlenaddr); }
 30	AUE_ACCEPT	NOPROTO	{ int accept(int s, caddr_t name, \
 				    int *anamelen); }
 31	AUE_GETPEERNAME	NOPROTO	{ int getpeername(int fdes, caddr_t asa, \
 				    int *alen); }
 32	AUE_GETSOCKNAME	NOPROTO	{ int getsockname(int fdes, caddr_t asa, \
 				    int *alen); }
 33	AUE_ACCESS	NOPROTO	{ int access(char *path, int amode); }
 34	AUE_CHFLAGS	NOPROTO	{ int chflags(const char *path, u_long flags); }
 35	AUE_FCHFLAGS	NOPROTO	{ int fchflags(int fd, u_long flags); }
 36	AUE_SYNC	NOPROTO	{ int sync(void); }
 37	AUE_KILL	NOPROTO	{ int kill(int pid, int signum); }
 38	AUE_STAT	COMPAT	{ int freebsd32_stat(char *path, \
 				    struct ostat32 *ub); }
 39	AUE_GETPPID	NOPROTO	{ pid_t getppid(void); }
 40	AUE_LSTAT	COMPAT	{ int freebsd32_lstat(char *path, \
 				    struct ostat *ub); }
 41	AUE_DUP		NOPROTO	{ int dup(u_int fd); }
 42	AUE_PIPE	COMPAT10	{ int freebsd32_pipe(void); }
 43	AUE_GETEGID	NOPROTO	{ gid_t getegid(void); }
 44	AUE_PROFILE	NOPROTO	{ int profil(caddr_t samples, size_t size, \
 				    size_t offset, u_int scale); }
 45	AUE_KTRACE	NOPROTO	{ int ktrace(const char *fname, int ops, \
 				    int facs, int pid); }
 46	AUE_SIGACTION	COMPAT	{ int freebsd32_sigaction( int signum, \
 				   struct osigaction32 *nsa, \
 				   struct osigaction32 *osa); }
 47	AUE_GETGID	NOPROTO	{ gid_t getgid(void); }
 48	AUE_SIGPROCMASK	COMPAT	{ int freebsd32_sigprocmask(int how, \
 				   osigset_t mask); }
 49	AUE_GETLOGIN	NOPROTO	{ int getlogin(char *namebuf, \
 				    u_int namelen); }
 50	AUE_SETLOGIN	NOPROTO	{ int setlogin(char *namebuf); }
 51	AUE_ACCT	NOPROTO	{ int acct(char *path); }
 52	AUE_SIGPENDING	COMPAT	{ int freebsd32_sigpending(void); }
 53	AUE_SIGALTSTACK	STD	{ int freebsd32_sigaltstack( \
 				    struct sigaltstack32 *ss, \
 				    struct sigaltstack32 *oss); }
 54	AUE_IOCTL	STD	{ int freebsd32_ioctl(int fd, uint32_t com, \
 				    struct md_ioctl32 *data); }
 55	AUE_REBOOT	NOPROTO	{ int reboot(int opt); }
 56	AUE_REVOKE	NOPROTO	{ int revoke(char *path); }
 57	AUE_SYMLINK	NOPROTO	{ int symlink(char *path, char *link); }
 58	AUE_READLINK	NOPROTO	{ ssize_t readlink(char *path, char *buf, \
 				    size_t count); }
 59	AUE_EXECVE	STD	{ int freebsd32_execve(char *fname, \
 				    uint32_t *argv, uint32_t *envv); }
 60	AUE_UMASK	NOPROTO	{ int umask(int newmask); } umask \
 				    umask_args int
 61	AUE_CHROOT	NOPROTO	{ int chroot(char *path); }
 62	AUE_FSTAT	COMPAT	{ int freebsd32_fstat(int fd, \
 				    struct ostat32 *ub); }
 63	AUE_NULL	OBSOL	ogetkerninfo
 64	AUE_NULL	COMPAT	{ int freebsd32_getpagesize( \
 				    int32_t dummy); }
 65	AUE_MSYNC	NOPROTO	{ int msync(void *addr, size_t len, \
 				    int flags); }
 66	AUE_VFORK	NOPROTO	{ int vfork(void); }
 67	AUE_NULL	OBSOL	vread
 68	AUE_NULL	OBSOL	vwrite
 69	AUE_SBRK	NOPROTO	{ int sbrk(int incr); }
 70	AUE_SSTK	NOPROTO	{ int sstk(int incr); }
 71	AUE_MMAP	COMPAT|NOPROTO	{ int mmap(void *addr, int len, \
 				    int prot, int flags, int fd, int pos); }
 72	AUE_O_VADVISE	NOPROTO	{ int ovadvise(int anom); } vadvise \
 				    ovadvise_args int
 73	AUE_MUNMAP	NOPROTO	{ int munmap(void *addr, size_t len); }
 74	AUE_MPROTECT	STD	{ int freebsd32_mprotect(void *addr, \
 				    size_t len, int prot); }
 75	AUE_MADVISE	NOPROTO	{ int madvise(void *addr, size_t len, \
 				    int behav); }
 76	AUE_NULL	OBSOL	vhangup
 77	AUE_NULL	OBSOL	vlimit
 78	AUE_MINCORE	NOPROTO	{ int mincore(const void *addr, size_t len, \
 				    char *vec); }
 79	AUE_GETGROUPS	NOPROTO	{ int getgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 80	AUE_SETGROUPS	NOPROTO	{ int setgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 81	AUE_GETPGRP	NOPROTO	{ int getpgrp(void); }
 82	AUE_SETPGRP	NOPROTO	{ int setpgid(int pid, int pgid); }
 83	AUE_SETITIMER	STD	{ int freebsd32_setitimer(u_int which, \
 				    struct itimerval32 *itv, \
 				    struct itimerval32 *oitv); }
 84	AUE_NULL	OBSOL	owait
 ; XXX implement
 85	AUE_SWAPON	NOPROTO	{ int swapon(char *name); }
 86	AUE_GETITIMER	STD	{ int freebsd32_getitimer(u_int which, \
 				    struct itimerval32 *itv); }
 87	AUE_O_GETHOSTNAME	OBSOL	ogethostname
 88	AUE_O_SETHOSTNAME	OBSOL	osethostname
 89	AUE_GETDTABLESIZE	NOPROTO	{ int getdtablesize(void); }
 90	AUE_DUP2	NOPROTO	{ int dup2(u_int from, u_int to); }
 91	AUE_NULL	UNIMPL	getdopt
 92	AUE_FCNTL	STD	{ int freebsd32_fcntl(int fd, int cmd, \
 				    int arg); }
 93	AUE_SELECT	STD	{ int freebsd32_select(int nd, fd_set *in, \
 				    fd_set *ou, fd_set *ex, \
 				    struct timeval32 *tv); }
 94	AUE_NULL	UNIMPL	setdopt
 95	AUE_FSYNC	NOPROTO	{ int fsync(int fd); }
 96	AUE_SETPRIORITY	NOPROTO	{ int setpriority(int which, int who, \
 				    int prio); }
 97	AUE_SOCKET	NOPROTO	{ int socket(int domain, int type, \
 				    int protocol); }
 98	AUE_CONNECT	NOPROTO	{ int connect(int s, caddr_t name, \
 				    int namelen); }
 99	AUE_NULL	OBSOL	oaccept
 100	AUE_GETPRIORITY	NOPROTO	{ int getpriority(int which, int who); }
 101	AUE_NULL	OBSOL	osend
 102	AUE_NULL	OBSOL	orecv
 103	AUE_SIGRETURN	COMPAT	{ int freebsd32_sigreturn( \
 				    struct ia32_sigcontext3 *sigcntxp); }
 104	AUE_BIND	NOPROTO	{ int bind(int s, caddr_t name, \
 				    int namelen); }
 105	AUE_SETSOCKOPT	NOPROTO	{ int setsockopt(int s, int level, \
 				    int name, caddr_t val, int valsize); }
 106	AUE_LISTEN	NOPROTO	{ int listen(int s, int backlog); }
 107	AUE_NULL	OBSOL	vtimes
 108	AUE_O_SIGVEC	COMPAT	{ int freebsd32_sigvec(int signum, \
 				     struct sigvec32 *nsv, \
 				     struct sigvec32 *osv); }
 109	AUE_O_SIGBLOCK	COMPAT	{ int freebsd32_sigblock(int mask); }
 110	AUE_O_SIGSETMASK	COMPAT	{ int freebsd32_sigsetmask( int mask); }
 111	AUE_SIGSUSPEND	COMPAT	{ int freebsd32_sigsuspend( int mask); }
 112	AUE_O_SIGSTACK	COMPAT	{ int freebsd32_sigstack( \
 				     struct sigstack32 *nss, \
 				     struct sigstack32 *oss); }
 113	AUE_NULL	OBSOL	orecvmsg
 114	AUE_NULL	OBSOL	osendmsg
 115	AUE_NULL	OBSOL	vtrace
 116	AUE_GETTIMEOFDAY	STD	{ int freebsd32_gettimeofday( \
 				    struct timeval32 *tp, \
 				    struct timezone *tzp); }
 117	AUE_GETRUSAGE	STD	{ int freebsd32_getrusage(int who, \
 				    struct rusage32 *rusage); }
 118	AUE_GETSOCKOPT	NOPROTO	{ int getsockopt(int s, int level, \
 				    int name, caddr_t val, int *avalsize); }
 119	AUE_NULL	UNIMPL	resuba (BSD/OS 2.x)
 120	AUE_READV	STD	{ int freebsd32_readv(int fd, \
 				    struct iovec32 *iovp, u_int iovcnt); }
 121	AUE_WRITEV	STD	{ int freebsd32_writev(int fd, \
 				    struct iovec32 *iovp, u_int iovcnt); }
 122	AUE_SETTIMEOFDAY	STD	{ int freebsd32_settimeofday( \
 				    struct timeval32 *tv, \
 				    struct timezone *tzp); }
 123	AUE_FCHOWN	NOPROTO	{ int fchown(int fd, int uid, int gid); }
 124	AUE_FCHMOD	NOPROTO	{ int fchmod(int fd, int mode); }
 125	AUE_RECVFROM	OBSOL	orecvfrom
 126	AUE_SETREUID	NOPROTO	{ int setreuid(int ruid, int euid); }
 127	AUE_SETREGID	NOPROTO	{ int setregid(int rgid, int egid); }
 128	AUE_RENAME	NOPROTO	{ int rename(char *from, char *to); }
 129	AUE_TRUNCATE	COMPAT|NOPROTO	{ int truncate(char *path, \
 					    int length); }
 130	AUE_FTRUNCATE	COMPAT|NOPROTO	{ int ftruncate(int fd, int length); }
 131	AUE_FLOCK	NOPROTO	{ int flock(int fd, int how); }
 132	AUE_MKFIFO	NOPROTO	{ int mkfifo(char *path, int mode); }
 133	AUE_SENDTO	NOPROTO	{ int sendto(int s, caddr_t buf, \
 				    size_t len, int flags, caddr_t to, \
 				    int tolen); }
 134	AUE_SHUTDOWN	NOPROTO	{ int shutdown(int s, int how); }
 135	AUE_SOCKETPAIR	NOPROTO	{ int socketpair(int domain, int type, \
 				    int protocol, int *rsv); }
 136	AUE_MKDIR	NOPROTO	{ int mkdir(char *path, int mode); }
 137	AUE_RMDIR	NOPROTO	{ int rmdir(char *path); }
 138	AUE_UTIMES	STD	{ int freebsd32_utimes(char *path, \
 				    struct timeval32 *tptr); }
 139	AUE_NULL	OBSOL	4.2 sigreturn
 140	AUE_ADJTIME	STD	{ int freebsd32_adjtime( \
 				    struct timeval32 *delta, \
 				    struct timeval32 *olddelta); }
 141	AUE_GETPEERNAME	OBSOL	ogetpeername
 142	AUE_SYSCTL	OBSOL	ogethostid
 143	AUE_SYSCTL	OBSOL	sethostid
 144	AUE_GETRLIMIT	OBSOL	getrlimit
 145	AUE_SETRLIMIT	OBSOL	setrlimit
 146	AUE_KILLPG	OBSOL	killpg
 147	AUE_SETSID	NOPROTO	{ int setsid(void); }
 148	AUE_QUOTACTL	NOPROTO	{ int quotactl(char *path, int cmd, int uid, \
 				    caddr_t arg); }
 149	AUE_O_QUOTA	OBSOL oquota
 150	AUE_GETSOCKNAME	OBSOL ogetsockname
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	AUE_NULL	UNIMPL	sem_lock (BSD/OS 2.x)
 152	AUE_NULL	UNIMPL	sem_wakeup (BSD/OS 2.x)
 153	AUE_NULL	UNIMPL	asyncdaemon (BSD/OS 2.x)
 ; 154 is initialised by the NLM code, if present.
 154	AUE_NULL	UNIMPL	nlm_syscall
 ; 155 is initialized by the NFS code, if present.
 ; XXX this is a problem!!!
 155	AUE_NFS_SVC	UNIMPL	nfssvc
 156	AUE_GETDIRENTRIES COMPAT { int freebsd32_getdirentries(int fd, \
 				    char *buf, u_int count, uint32_t *basep); }
 157	AUE_STATFS	COMPAT4	{ int freebsd32_statfs(char *path, \
 				    struct statfs32 *buf); }
 158	AUE_FSTATFS	COMPAT4	{ int freebsd32_fstatfs(int fd, \
 				    struct statfs32 *buf); }
 159	AUE_NULL	UNIMPL	nosys
 160	AUE_LGETFH	UNIMPL	lgetfh
 161	AUE_NFS_GETFH	NOPROTO	{ int getfh(char *fname, \
 				    struct fhandle *fhp); }
 162	AUE_SYSCTL	OBSOL	getdomainname
 163	AUE_SYSCTL	OBSOL	setdomainname
 164	AUE_NULL	OBSOL	uname
 165	AUE_SYSARCH	STD	{ int freebsd32_sysarch(int op, char *parms); }
 166	AUE_RTPRIO	NOPROTO	{ int rtprio(int function, pid_t pid, \
 				    struct rtprio *rtp); }
 167	AUE_NULL	UNIMPL	nosys
 168	AUE_NULL	UNIMPL	nosys
 169	AUE_SEMSYS	NOSTD	{ int freebsd32_semsys(int which, int a2, \
 				    int a3, int a4, int a5); }
 170	AUE_MSGSYS	NOSTD	{ int freebsd32_msgsys(int which, int a2, \
 				    int a3, int a4, int a5, int a6); }
 171	AUE_SHMSYS	NOSTD	{ int freebsd32_shmsys(uint32_t which, uint32_t a2, \
 				    uint32_t a3, uint32_t a4); }
 172	AUE_NULL	UNIMPL	nosys
 173	AUE_PREAD	COMPAT6	{ ssize_t freebsd32_pread(int fd, void *buf, \
 				    size_t nbyte, int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 174	AUE_PWRITE	COMPAT6	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 175	AUE_NULL	UNIMPL	nosys
 176	AUE_NTP_ADJTIME	NOPROTO	{ int ntp_adjtime(struct timex *tp); }
 177	AUE_NULL	UNIMPL	sfork (BSD/OS 2.x)
 178	AUE_NULL	UNIMPL	getdescriptor (BSD/OS 2.x)
 179	AUE_NULL	UNIMPL	setdescriptor (BSD/OS 2.x)
 180	AUE_NULL	UNIMPL	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	AUE_SETGID	NOPROTO	{ int setgid(gid_t gid); }
 182	AUE_SETEGID	NOPROTO	{ int setegid(gid_t egid); }
 183	AUE_SETEUID	NOPROTO	{ int seteuid(uid_t euid); }
 184	AUE_NULL	UNIMPL	lfs_bmapv
 185	AUE_NULL	UNIMPL	lfs_markv
 186	AUE_NULL	UNIMPL	lfs_segclean
 187	AUE_NULL	UNIMPL	lfs_segwait
-188	AUE_STAT	STD	{ int freebsd32_stat(char *path, \
-				    struct stat32 *ub); }
-189	AUE_FSTAT	STD	{ int freebsd32_fstat(int fd, \
-				    struct stat32 *ub); }
-190	AUE_LSTAT	STD	{ int freebsd32_lstat(char *path, \
-				    struct stat32 *ub); }
+188	AUE_STAT	COMPAT11 { int freebsd32_stat(char *path, \
+				    struct freebsd11_stat32 *ub); }
+189	AUE_FSTAT	COMPAT11 { int freebsd32_fstat(int fd, \
+				    struct freebsd11_stat32 *ub); }
+190	AUE_LSTAT	COMPAT11 { int freebsd32_lstat(char *path, \
+				    struct freebsd11_stat32 *ub); }
 191	AUE_PATHCONF	NOPROTO	{ int pathconf(char *path, int name); }
 192	AUE_FPATHCONF	NOPROTO	{ int fpathconf(int fd, int name); }
 193	AUE_NULL	UNIMPL	nosys
 194	AUE_GETRLIMIT	NOPROTO	{ int getrlimit(u_int which, \
 				    struct rlimit *rlp); } getrlimit \
 				    __getrlimit_args int
 195	AUE_SETRLIMIT	NOPROTO	{ int setrlimit(u_int which, \
 				    struct rlimit *rlp); } setrlimit \
 				    __setrlimit_args int
-196	AUE_GETDIRENTRIES	STD	{ int freebsd32_getdirentries(int fd, \
+196	AUE_GETDIRENTRIES COMPAT11 { int freebsd32_getdirentries(int fd, \
 				    char *buf, u_int count, int32_t *basep); }
 197	AUE_MMAP	COMPAT6	{ caddr_t freebsd32_mmap(caddr_t addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, uint32_t pos1, uint32_t pos2); }
 198	AUE_NULL	NOPROTO	{ int nosys(void); } __syscall \
 				    __syscall_args int
 199	AUE_LSEEK	COMPAT6	{ off_t freebsd32_lseek(int fd, int pad, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 200	AUE_TRUNCATE	COMPAT6	{ int freebsd32_truncate(char *path, \
 				    int pad, uint32_t length1, \
 				    uint32_t length2); }
 201	AUE_FTRUNCATE	COMPAT6	{ int freebsd32_ftruncate(int fd, int pad, \
 				    uint32_t length1, uint32_t length2); }
 202	AUE_SYSCTL	STD	{ int freebsd32_sysctl(int *name, \
 				    u_int namelen, void *old, \
 				    uint32_t *oldlenp, void *new, \
 				    uint32_t newlen); }
 203	AUE_MLOCK	NOPROTO	{ int mlock(const void *addr, \
 				    size_t len); }
 204	AUE_MUNLOCK	NOPROTO	{ int munlock(const void *addr, \
 				    size_t len); }
 205	AUE_UNDELETE	NOPROTO	{ int undelete(char *path); }
 206	AUE_FUTIMES	STD	{ int freebsd32_futimes(int fd, \
 				    struct timeval32 *tptr); }
 207	AUE_GETPGID	NOPROTO	{ int getpgid(pid_t pid); }
 208	AUE_NULL	UNIMPL	newreboot (NetBSD)
 209	AUE_POLL	NOPROTO	{ int poll(struct pollfd *fds, u_int nfds, \
 				    int timeout); }
 
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 211	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 212	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 213	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 214	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 215	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 216	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 217	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 218	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 219	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 
 ;
 ; The following were introduced with NetBSD/4.4Lite-2
 ; They are initialized by their respective modules/sysinits
 ; XXX PROBLEM!!
 220	AUE_SEMCTL	COMPAT7|NOSTD	{ int freebsd32_semctl( \
 				    int semid, int semnum, \
 				    int cmd, union semun32 *arg); }
 221	AUE_SEMGET	NOSTD|NOPROTO	{ int semget(key_t key, int nsems, \
 				    int semflg); }
 222	AUE_SEMOP	NOSTD|NOPROTO	{ int semop(int semid, \
 				    struct sembuf *sops, u_int nsops); }
 223	AUE_NULL	UNIMPL	semconfig
 224	AUE_MSGCTL	COMPAT7|NOSTD	{ int freebsd32_msgctl( \
 				    int msqid, int cmd, \
 				    struct msqid_ds32_old *buf); }
 225	AUE_MSGGET	NOSTD|NOPROTO	{ int msgget(key_t key, int msgflg); }
 226	AUE_MSGSND	NOSTD	{ int freebsd32_msgsnd(int msqid, void *msgp, \
 				    size_t msgsz, int msgflg); }
 227	AUE_MSGRCV	NOSTD	{ int freebsd32_msgrcv(int msqid, void *msgp, \
 				    size_t msgsz, long msgtyp, int msgflg); }
 228	AUE_SHMAT	NOSTD|NOPROTO	{ int shmat(int shmid, void *shmaddr, \
 				    int shmflg); }
 229	AUE_SHMCTL	COMPAT7|NOSTD	{ int freebsd32_shmctl( \
 				    int shmid, int cmd, \
 				    struct shmid_ds32_old *buf); }
 230	AUE_SHMDT	NOSTD|NOPROTO	{ int shmdt(void *shmaddr); }
 231	AUE_SHMGET	NOSTD|NOPROTO	{ int shmget(key_t key, int size, \
 				    int shmflg); }
 ;
 232	AUE_NULL	STD 	{ int freebsd32_clock_gettime(clockid_t clock_id, \
 				    struct timespec32 *tp); }
 233	AUE_CLOCK_SETTIME	STD	{ int freebsd32_clock_settime(clockid_t clock_id, \
 				    const struct timespec32 *tp); }
 234	AUE_NULL	STD	{ int freebsd32_clock_getres(clockid_t clock_id, \
 				    struct timespec32 *tp); }
 235	AUE_NULL	STD	{ int freebsd32_ktimer_create(\
 				    clockid_t clock_id, \
 				    struct sigevent32 *evp, int *timerid); }
 236	AUE_NULL	NOPROTO	{ int ktimer_delete(int timerid); }
 237	AUE_NULL	STD	{ int freebsd32_ktimer_settime(int timerid,\
 				    int flags, \
 				    const struct itimerspec32 *value, \
 				    struct itimerspec32 *ovalue); }
 238	AUE_NULL	STD	{ int freebsd32_ktimer_gettime(int timerid,\
 				    struct itimerspec32 *value); }
 239	AUE_NULL	NOPROTO	{ int ktimer_getoverrun(int timerid); }
 240	AUE_NULL	STD	{ int freebsd32_nanosleep( \
 				    const struct timespec32 *rqtp, \
 				    struct timespec32 *rmtp); }
 241	AUE_NULL	NOPROTO	{ int ffclock_getcounter(ffcounter *ffcount); }
 242	AUE_NULL	NOPROTO	{ int ffclock_setestimate( \
 				    struct ffclock_estimate *cest); }
 243	AUE_NULL	NOPROTO	{ int ffclock_getestimate( \
 				    struct ffclock_estimate *cest); }
 244	AUE_NULL	STD	{ int freebsd32_clock_nanosleep( \
 				    clockid_t clock_id, int flags, \
 				    const struct timespec32 *rqtp, \
 				    struct timespec32 *rmtp); }
 245	AUE_NULL	UNIMPL	nosys
 246	AUE_NULL	UNIMPL	nosys
 247	AUE_NULL	STD	{ int freebsd32_clock_getcpuclockid2(\
 				    uint32_t id1, uint32_t id2,\
 				    int which, clockid_t *clock_id); }
 248	AUE_NULL	UNIMPL	ntp_gettime
 249	AUE_NULL	UNIMPL	nosys
 ; syscall numbers initially used in OpenBSD
 250	AUE_MINHERIT	NOPROTO	{ int minherit(void *addr, size_t len, \
 				    int inherit); }
 251	AUE_RFORK	NOPROTO	{ int rfork(int flags); }
 252	AUE_POLL	OBSOL	openbsd_poll
 253	AUE_ISSETUGID	NOPROTO	{ int issetugid(void); }
 254	AUE_LCHOWN	NOPROTO	{ int lchown(char *path, int uid, int gid); }
 255	AUE_AIO_READ	STD	{ int freebsd32_aio_read( \
 				    struct aiocb32 *aiocbp); }
 256	AUE_AIO_WRITE	STD	{ int freebsd32_aio_write( \
 				    struct aiocb32 *aiocbp); }
 257	AUE_LIO_LISTIO	STD	{ int freebsd32_lio_listio(int mode, \
 				    struct aiocb32 * const *acb_list, \
 				    int nent, struct sigevent32 *sig); }
 258	AUE_NULL	UNIMPL	nosys
 259	AUE_NULL	UNIMPL	nosys
 260	AUE_NULL	UNIMPL	nosys
 261	AUE_NULL	UNIMPL	nosys
 262	AUE_NULL	UNIMPL	nosys
 263	AUE_NULL	UNIMPL	nosys
 264	AUE_NULL	UNIMPL	nosys
 265	AUE_NULL	UNIMPL	nosys
 266	AUE_NULL	UNIMPL	nosys
 267	AUE_NULL	UNIMPL	nosys
 268	AUE_NULL	UNIMPL	nosys
 269	AUE_NULL	UNIMPL	nosys
 270	AUE_NULL	UNIMPL	nosys
 271	AUE_NULL	UNIMPL	nosys
-272	AUE_O_GETDENTS	NOPROTO	{ int getdents(int fd, char *buf, \
-				    size_t count); }
+272	AUE_O_GETDENTS	COMPAT11 { int freebsd32_getdents(int fd, char *buf, \
+				    int count); }
 273	AUE_NULL	UNIMPL	nosys
 274	AUE_LCHMOD	NOPROTO	{ int lchmod(char *path, mode_t mode); }
 275	AUE_LCHOWN	NOPROTO	{ int lchown(char *path, uid_t uid, \
 				    gid_t gid); } netbsd_lchown \
 				    lchown_args int
 276	AUE_LUTIMES	STD	{ int freebsd32_lutimes(char *path, \
 				    struct timeval32 *tptr); }
 277	AUE_MSYNC	NOPROTO	{ int msync(void *addr, size_t len, \
 				    int flags); } netbsd_msync msync_args int
-278	AUE_STAT	NOPROTO	{ int nstat(char *path, struct nstat *ub); }
-279	AUE_FSTAT	NOPROTO	{ int nfstat(int fd, struct nstat *sb); }
-280	AUE_LSTAT	NOPROTO	{ int nlstat(char *path, struct nstat *ub); }
+278	AUE_STAT  COMPAT11|NOPROTO { int nstat(char *path, struct nstat *ub); }
+279	AUE_FSTAT COMPAT11|NOPROTO { int nfstat(int fd, struct nstat *sb); }
+280	AUE_LSTAT COMPAT11|NOPROTO { int nlstat(char *path, struct nstat *ub); }
 281	AUE_NULL	UNIMPL	nosys
 282	AUE_NULL	UNIMPL	nosys
 283	AUE_NULL	UNIMPL	nosys
 284	AUE_NULL	UNIMPL	nosys
 285	AUE_NULL	UNIMPL	nosys
 286	AUE_NULL	UNIMPL	nosys
 287	AUE_NULL	UNIMPL	nosys
 288	AUE_NULL	UNIMPL	nosys
 ; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
 289	AUE_PREADV	STD	{ ssize_t freebsd32_preadv(int fd, \
 					struct iovec32 *iovp, \
 					u_int iovcnt, \
 					uint32_t offset1, uint32_t offset2); }
 290	AUE_PWRITEV	STD	{ ssize_t freebsd32_pwritev(int fd, \
 					struct iovec32 *iovp, \
 					u_int iovcnt, \
 					uint32_t offset1, uint32_t offset2); }
 291	AUE_NULL	UNIMPL	nosys
 292	AUE_NULL	UNIMPL	nosys
 293	AUE_NULL	UNIMPL	nosys
 294	AUE_NULL	UNIMPL	nosys
 295	AUE_NULL	UNIMPL	nosys
 296	AUE_NULL	UNIMPL	nosys
 ; XXX 297 is 300 in NetBSD 
 297	AUE_FHSTATFS	COMPAT4	{ int freebsd32_fhstatfs( \
 				    const struct fhandle *u_fhp, \
 				    struct statfs32 *buf); }
 298	AUE_FHOPEN	NOPROTO	{ int fhopen(const struct fhandle *u_fhp, \
 			 	    int flags); }
-299	AUE_FHSTAT	NOPROTO	{ int fhstat(const struct fhandle *u_fhp, \
-				    struct stat *sb); }
+299	AUE_FHSTAT	COMPAT11 { int freebsd32_fhstat( \
+				    const struct fhandle *u_fhp, \
+				    struct freebsd11_stat32 *sb); }
 ; syscall numbers for FreeBSD
 300	AUE_NULL	NOPROTO	{ int modnext(int modid); }
 301	AUE_NULL	STD	{ int freebsd32_modstat(int modid, \
 				    struct module_stat32* stat); }
 302	AUE_NULL	NOPROTO	{ int modfnext(int modid); }
 303	AUE_NULL	NOPROTO	{ int modfind(const char *name); }
 304	AUE_MODLOAD	NOPROTO	{ int kldload(const char *file); }
 305	AUE_MODUNLOAD	NOPROTO	{ int kldunload(int fileid); }
 306	AUE_NULL	NOPROTO	{ int kldfind(const char *file); }
 307	AUE_NULL	NOPROTO	{ int kldnext(int fileid); }
 308	AUE_NULL	STD	{ int freebsd32_kldstat(int fileid, \
 				    struct kld32_file_stat* stat); }
 309	AUE_NULL	NOPROTO	{ int kldfirstmod(int fileid); }
 310	AUE_GETSID	NOPROTO	{ int getsid(pid_t pid); }
 311	AUE_SETRESUID	NOPROTO	{ int setresuid(uid_t ruid, uid_t euid, \
 				    uid_t suid); }
 312	AUE_SETRESGID	NOPROTO	{ int setresgid(gid_t rgid, gid_t egid, \
 				    gid_t sgid); }
 313	AUE_NULL	OBSOL	signanosleep
 314	AUE_AIO_RETURN	STD	{ int freebsd32_aio_return( \
 				    struct aiocb32 *aiocbp); }
 315	AUE_AIO_SUSPEND	STD	{ int freebsd32_aio_suspend( \
 				    struct aiocb32 * const * aiocbp, int nent, \
 				    const struct timespec32 *timeout); }
 316	AUE_AIO_CANCEL	NOPROTO	{ int aio_cancel(int fd, \
 				    struct aiocb *aiocbp); }
 317	AUE_AIO_ERROR	STD	{ int freebsd32_aio_error( \
 				    struct aiocb32 *aiocbp); }
 318	AUE_AIO_READ	COMPAT6	{ int freebsd32_aio_read( \
 				    struct oaiocb32 *aiocbp); }
 319	AUE_AIO_WRITE	COMPAT6	{ int freebsd32_aio_write( \
 				    struct oaiocb32 *aiocbp); }
 320	AUE_LIO_LISTIO	COMPAT6	{ int freebsd32_lio_listio(int mode, \
 				    struct oaiocb32 * const *acb_list, \
 				    int nent, struct osigevent32 *sig); }
 321	AUE_NULL	NOPROTO	{ int yield(void); }
 322	AUE_NULL	OBSOL	thr_sleep
 323	AUE_NULL	OBSOL	thr_wakeup
 324	AUE_MLOCKALL	NOPROTO	{ int mlockall(int how); }
 325	AUE_MUNLOCKALL	NOPROTO	{ int munlockall(void); }
 326	AUE_GETCWD	NOPROTO	{ int __getcwd(char *buf, size_t buflen); }
 
 327	AUE_NULL	NOPROTO	{ int sched_setparam (pid_t pid, \
 				    const struct sched_param *param); }
 328	AUE_NULL	NOPROTO	{ int sched_getparam (pid_t pid, \
 				    struct sched_param *param); }
 
 329	AUE_NULL	NOPROTO	{ int sched_setscheduler (pid_t pid, \
 				    int policy, \
 				    const struct sched_param *param); }
 330	AUE_NULL	NOPROTO	{ int sched_getscheduler (pid_t pid); }
 
 331	AUE_NULL	NOPROTO	{ int sched_yield (void); }
 332	AUE_NULL	NOPROTO	{ int sched_get_priority_max (int policy); }
 333	AUE_NULL	NOPROTO	{ int sched_get_priority_min (int policy); }
 334	AUE_NULL	NOPROTO	{ int sched_rr_get_interval (pid_t pid, \
 				    struct timespec *interval); }
 335	AUE_NULL	NOPROTO	{ int utrace(const void *addr, size_t len); }
 336	AUE_SENDFILE	COMPAT4	{ int freebsd32_sendfile(int fd, int s, \
 				    uint32_t offset1, uint32_t offset2, \
 				    size_t nbytes, struct sf_hdtr32 *hdtr, \
 				    off_t *sbytes, int flags); }
 337	AUE_NULL	NOPROTO	{ int kldsym(int fileid, int cmd, \
 				    void *data); }
 338	AUE_JAIL	STD	{ int freebsd32_jail(struct jail32 *jail); }
 339	AUE_NULL	UNIMPL	pioctl
 340	AUE_SIGPROCMASK	NOPROTO	{ int sigprocmask(int how, \
 				    const sigset_t *set, sigset_t *oset); }
 341	AUE_SIGSUSPEND	NOPROTO	{ int sigsuspend(const sigset_t *sigmask); }
 342	AUE_SIGACTION	COMPAT4	{ int freebsd32_sigaction(int sig, \
 				    struct sigaction32 *act, \
 				    struct sigaction32 *oact); }
 343	AUE_SIGPENDING	NOPROTO	{ int sigpending(sigset_t *set); }
 344	AUE_SIGRETURN	COMPAT4	{ int freebsd32_sigreturn( \
 		    const struct freebsd4_freebsd32_ucontext *sigcntxp); }
 345	AUE_SIGWAIT	STD	{ int freebsd32_sigtimedwait(const sigset_t *set, \
 				    siginfo_t *info, \
 				    const struct timespec *timeout); }
 346	AUE_NULL	STD	{ int freebsd32_sigwaitinfo(const sigset_t *set, \
 				    siginfo_t *info); }
 347	AUE_ACL_GET_FILE	NOPROTO	{ int __acl_get_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 348	AUE_ACL_SET_FILE	NOPROTO	{ int __acl_set_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 349	AUE_ACL_GET_FD	NOPROTO	{ int __acl_get_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 350	AUE_ACL_SET_FD	NOPROTO	{ int __acl_set_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 351	AUE_ACL_DELETE_FILE	NOPROTO	{ int __acl_delete_file(const char *path, \
 				    acl_type_t type); }
 352	AUE_ACL_DELETE_FD	NOPROTO	{ int __acl_delete_fd(int filedes, \
 				    acl_type_t type); }
 353	AUE_ACL_CHECK_FILE	NOPROTO	{ int __acl_aclcheck_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 354	AUE_ACL_CHECK_FD	NOPROTO	{ int __acl_aclcheck_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 355	AUE_EXTATTRCTL	NOPROTO	{ int extattrctl(const char *path, int cmd, \
 				    const char *filename, int attrnamespace, \
 				    const char *attrname); }
 356	AUE_EXTATTR_SET_FILE	NOPROTO	{ ssize_t extattr_set_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 357	AUE_EXTATTR_GET_FILE	NOPROTO	{ ssize_t extattr_get_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 358	AUE_EXTATTR_DELETE_FILE	NOPROTO	{ int extattr_delete_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 359	AUE_AIO_WAITCOMPLETE	STD	{ int freebsd32_aio_waitcomplete( \
 				    struct aiocb32 **aiocbp, \
 				    struct timespec32 *timeout); }
 360	AUE_GETRESUID	NOPROTO	{ int getresuid(uid_t *ruid, uid_t *euid, \
 				    uid_t *suid); }
 361	AUE_GETRESGID	NOPROTO	{ int getresgid(gid_t *rgid, gid_t *egid, \
 				    gid_t *sgid); }
 362	AUE_KQUEUE	NOPROTO	{ int kqueue(void); }
 363	AUE_KEVENT	STD	{ int freebsd32_kevent(int fd, \
 				    const struct kevent32 *changelist, \
 				    int nchanges, \
 				    struct kevent32 *eventlist, int nevents, \
 				    const struct timespec32 *timeout); }
 364	AUE_NULL	UNIMPL	__cap_get_proc
 365	AUE_NULL	UNIMPL	__cap_set_proc
 366	AUE_NULL	UNIMPL	__cap_get_fd
 367	AUE_NULL	UNIMPL	__cap_get_file
 368	AUE_NULL	UNIMPL	__cap_set_fd
 369	AUE_NULL	UNIMPL	__cap_set_file
 370	AUE_NULL	UNIMPL	nosys
 371	AUE_EXTATTR_SET_FD	NOPROTO	{ ssize_t extattr_set_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 372	AUE_EXTATTR_GET_FD	NOPROTO	{ ssize_t extattr_get_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 373	AUE_EXTATTR_DELETE_FD	NOPROTO	{ int extattr_delete_fd(int fd, \
 				    int attrnamespace, \
 				    const char *attrname); }
 374	AUE_SETUGID	NOPROTO	{ int __setugid(int flag); }
 375	AUE_NULL	UNIMPL	nfsclnt
 376	AUE_EACCESS	NOPROTO	{ int eaccess(char *path, int amode); }
 377	AUE_NULL	UNIMPL	afs_syscall
 378	AUE_NMOUNT	STD	{ int freebsd32_nmount(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 379	AUE_NULL	UNIMPL	kse_exit
 380	AUE_NULL	UNIMPL	kse_wakeup
 381	AUE_NULL	UNIMPL	kse_create
 382	AUE_NULL	UNIMPL	kse_thr_interrupt
 383	AUE_NULL	UNIMPL	kse_release
 384	AUE_NULL	UNIMPL	__mac_get_proc
 385	AUE_NULL	UNIMPL	__mac_set_proc
 386	AUE_NULL	UNIMPL	__mac_get_fd
 387	AUE_NULL	UNIMPL	__mac_get_file
 388	AUE_NULL	UNIMPL	__mac_set_fd
 389	AUE_NULL	UNIMPL	__mac_set_file
 390	AUE_NULL	NOPROTO	{ int kenv(int what, const char *name, \
 				    char *value, int len); }
 391	AUE_LCHFLAGS	NOPROTO	{ int lchflags(const char *path, \
 				    u_long flags); }
 392	AUE_NULL	NOPROTO	{ int uuidgen(struct uuid *store, \
 				    int count); }
 393	AUE_SENDFILE	STD	{ int freebsd32_sendfile(int fd, int s, \
 				    uint32_t offset1, uint32_t offset2, \
 				    size_t nbytes, struct sf_hdtr32 *hdtr, \
 				    off_t *sbytes, int flags); }
 394	AUE_NULL	UNIMPL	mac_syscall
-395	AUE_GETFSSTAT	NOPROTO	{ int getfsstat(struct statfs *buf, \
+395	AUE_GETFSSTAT	COMPAT11|NOPROTO	{ int getfsstat( \
+				    struct freebsd11_statfs *buf, \
 				    long bufsize, int mode); }
-396	AUE_STATFS	NOPROTO	{ int statfs(char *path, \
+396	AUE_STATFS	COMPAT11|NOPROTO	{ int statfs(char *path, \
 				    struct statfs *buf); }
-397	AUE_FSTATFS	NOPROTO	{ int fstatfs(int fd, struct statfs *buf); }
-398	AUE_FHSTATFS	NOPROTO	{ int fhstatfs(const struct fhandle *u_fhp, \
-				    struct statfs *buf); }
+397	AUE_FSTATFS	COMPAT11|NOPROTO	{ int fstatfs(int fd, \
+				    struct freebsd11_statfs *buf); }
+398	AUE_FHSTATFS	COMPAT11|NOPROTO	{ int fhstatfs( \
+				    const struct fhandle *u_fhp, \
+				    struct freebsd11_statfs *buf); }
 399	AUE_NULL	UNIMPL	nosys
 400	AUE_SEMCLOSE	NOSTD|NOPROTO	{ int ksem_close(semid_t id); }
 401	AUE_SEMPOST	NOSTD|NOPROTO	{ int ksem_post(semid_t id); }
 402	AUE_SEMWAIT	NOSTD|NOPROTO	{ int ksem_wait(semid_t id); }
 403	AUE_SEMTRYWAIT	NOSTD|NOPROTO	{ int ksem_trywait(semid_t id); }
 404	AUE_SEMINIT	NOSTD	{ int freebsd32_ksem_init(semid_t *idp, \
 				    unsigned int value); }
 405	AUE_SEMOPEN	NOSTD	{ int freebsd32_ksem_open(semid_t *idp, \
 				    const char *name, int oflag, \
 				    mode_t mode, unsigned int value); }
 406	AUE_SEMUNLINK	NOSTD|NOPROTO	{ int ksem_unlink(const char *name); }
 407	AUE_SEMGETVALUE	NOSTD|NOPROTO	{ int ksem_getvalue(semid_t id, \
 				    int *val); }
 408	AUE_SEMDESTROY	NOSTD|NOPROTO	{ int ksem_destroy(semid_t id); }
 409	AUE_NULL	UNIMPL	__mac_get_pid
 410	AUE_NULL	UNIMPL	__mac_get_link
 411	AUE_NULL	UNIMPL	__mac_set_link
 412	AUE_EXTATTR_SET_LINK	NOPROTO	{ ssize_t extattr_set_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 413	AUE_EXTATTR_GET_LINK	NOPROTO	{ ssize_t extattr_get_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 414	AUE_EXTATTR_DELETE_LINK	NOPROTO	{ int extattr_delete_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 415	AUE_NULL	UNIMPL	__mac_execve
 416	AUE_SIGACTION	STD	{ int freebsd32_sigaction(int sig, \
 				    struct sigaction32 *act, \
 				    struct sigaction32 *oact); }
 417	AUE_SIGRETURN	STD	{ int freebsd32_sigreturn( \
 		    const struct freebsd32_ucontext *sigcntxp); }
 418	AUE_NULL	UNIMPL	__xstat
 419	AUE_NULL	UNIMPL	__xfstat
 420	AUE_NULL	UNIMPL	__xlstat
 421	AUE_NULL	STD	{ int freebsd32_getcontext( \
 				    struct freebsd32_ucontext *ucp); }
 422	AUE_NULL	STD	{ int freebsd32_setcontext( \
 				    const struct freebsd32_ucontext *ucp); }
 423	AUE_NULL	STD	{ int freebsd32_swapcontext( \
 				    struct freebsd32_ucontext *oucp, \
 				    const struct freebsd32_ucontext *ucp); }
 424	AUE_SWAPOFF	UNIMPL	swapoff
 425	AUE_ACL_GET_LINK	NOPROTO	{ int __acl_get_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 426	AUE_ACL_SET_LINK	NOPROTO	{ int __acl_set_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 427	AUE_ACL_DELETE_LINK	NOPROTO	{ int __acl_delete_link(const char *path, \
 				    acl_type_t type); }
 428	AUE_ACL_CHECK_LINK	NOPROTO	{ int __acl_aclcheck_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 429	AUE_SIGWAIT	NOPROTO	{ int sigwait(const sigset_t *set, \
 				    int *sig); }
 430	AUE_THR_CREATE	UNIMPL	thr_create;
 431	AUE_THR_EXIT	NOPROTO	{ void thr_exit(long *state); }
 432	AUE_NULL	NOPROTO	{ int thr_self(long *id); }
 433	AUE_THR_KILL	NOPROTO	{ int thr_kill(long id, int sig); }
 434	AUE_NULL	UNIMPL	nosys
 435	AUE_NULL	UNIMPL	nosys
 436	AUE_JAIL_ATTACH	NOPROTO	{ int jail_attach(int jid); }
 437	AUE_EXTATTR_LIST_FD	NOPROTO	{ ssize_t extattr_list_fd(int fd, \
 				    int attrnamespace, void *data, \
 				    size_t nbytes); }
 438	AUE_EXTATTR_LIST_FILE	NOPROTO	{ ssize_t extattr_list_file( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 439	AUE_EXTATTR_LIST_LINK	NOPROTO	{ ssize_t extattr_list_link( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 440	AUE_NULL	UNIMPL	kse_switchin
 441	AUE_SEMWAIT	NOSTD	{ int freebsd32_ksem_timedwait(semid_t id, \
 				    const struct timespec32 *abstime); }
 442	AUE_NULL	STD	{ int freebsd32_thr_suspend( \
 				    const struct timespec32 *timeout); }
 443	AUE_NULL	NOPROTO	{ int thr_wake(long id); }
 444	AUE_MODUNLOAD	NOPROTO	{ int kldunloadf(int fileid, int flags); }
 445	AUE_AUDIT	NOPROTO	{ int audit(const void *record, \
 				    u_int length); }
 446	AUE_AUDITON	NOPROTO	{ int auditon(int cmd, void *data, \
 				    u_int length); }
 447	AUE_GETAUID	NOPROTO	{ int getauid(uid_t *auid); }
 448	AUE_SETAUID	NOPROTO	{ int setauid(uid_t *auid); }
 449	AUE_GETAUDIT	NOPROTO	{ int getaudit(struct auditinfo *auditinfo); }
 450	AUE_SETAUDIT	NOPROTO	{ int setaudit(struct auditinfo *auditinfo); }
 451	AUE_GETAUDIT_ADDR	NOPROTO	{ int getaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 452	AUE_SETAUDIT_ADDR	NOPROTO	{ int setaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 453	AUE_AUDITCTL	NOPROTO	{ int auditctl(char *path); }
 454	AUE_NULL	STD	{ int freebsd32_umtx_op(void *obj, int op,\
 				    u_long val, void *uaddr, \
 				    void *uaddr2); }
 455	AUE_THR_NEW	STD	{ int freebsd32_thr_new(	\
 				    struct thr_param32 *param,	\
 				    int param_size); }
 456	AUE_NULL	STD	{ int freebsd32_sigqueue(pid_t pid, \
 				    int signum, int value); }
 457	AUE_MQ_OPEN	NOSTD	{ int freebsd32_kmq_open( \
 				    const char *path, int flags, mode_t mode, \
 				    const struct mq_attr32 *attr); }
 458	AUE_MQ_SETATTR	NOSTD	{ int freebsd32_kmq_setattr(int mqd, \
 				    const struct mq_attr32 *attr,	\
 				    struct mq_attr32 *oattr); }
 459	AUE_MQ_TIMEDRECEIVE	NOSTD	{ int freebsd32_kmq_timedreceive(int mqd, \
 				    char *msg_ptr, size_t msg_len,	\
 				    unsigned *msg_prio,			\
 				    const struct timespec32 *abs_timeout); }
 460	AUE_MQ_TIMEDSEND	NOSTD	{ int freebsd32_kmq_timedsend(int mqd,	\
 				    const char *msg_ptr, size_t msg_len,\
 				    unsigned msg_prio,			\
 				    const struct timespec32 *abs_timeout);}
 461	AUE_MQ_NOTIFY	NOSTD	{ int freebsd32_kmq_notify(int mqd,	\
 				    const struct sigevent32 *sigev); }
 462	AUE_MQ_UNLINK	NOPROTO|NOSTD	{ int kmq_unlink(const char *path); }
 463	AUE_NULL	NOPROTO	{ int abort2(const char *why, int nargs, void **args); }
 464	AUE_NULL 	NOPROTO	{ int thr_set_name(long id, const char *name); }
 465	AUE_AIO_FSYNC	STD	{ int freebsd32_aio_fsync(int op, \
 				    struct aiocb32 *aiocbp); }
 466	AUE_RTPRIO	NOPROTO	{ int rtprio_thread(int function, \
 				    lwpid_t lwpid, struct rtprio *rtp); }
 467	AUE_NULL	UNIMPL	nosys
 468	AUE_NULL	UNIMPL	nosys
 469	AUE_NULL	UNIMPL	__getpath_fromfd
 470	AUE_NULL	UNIMPL	__getpath_fromaddr
 471	AUE_SCTP_PEELOFF	NOPROTO|NOSTD	{ int sctp_peeloff(int sd, uint32_t name); }
 472	AUE_SCTP_GENERIC_SENDMSG	NOPROTO|NOSTD	{ int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \
 				    caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 473	AUE_SCTP_GENERIC_SENDMSG_IOV	NOPROTO|NOSTD	{ int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \
 				    caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 474	AUE_SCTP_GENERIC_RECVMSG	NOPROTO|NOSTD	{ int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
 #ifdef PAD64_REQUIRED
 475	AUE_PREAD	STD	{ ssize_t freebsd32_pread(int fd, \
 				    void *buf,size_t nbyte, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 476	AUE_PWRITE	STD	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 477	AUE_MMAP	STD 	{ caddr_t freebsd32_mmap(caddr_t addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, \
 				    uint32_t pos1, uint32_t pos2); }
 478	AUE_LSEEK	STD	{ off_t freebsd32_lseek(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int freebsd32_truncate(char *path, \
 				    int pad, \
 				    uint32_t length1, uint32_t length2); }
 480	AUE_FTRUNCATE	STD	{ int freebsd32_ftruncate(int fd, \
 				    int pad, \
 				    uint32_t length1, uint32_t length2); }
 #else
 475	AUE_PREAD	STD	{ ssize_t freebsd32_pread(int fd, \
 				    void *buf,size_t nbyte, \
 				    uint32_t offset1, uint32_t offset2); }
 476	AUE_PWRITE	STD	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, \
 				    uint32_t offset1, uint32_t offset2); }
 477	AUE_MMAP	STD 	{ caddr_t freebsd32_mmap(caddr_t addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    uint32_t pos1, uint32_t pos2); }
 478	AUE_LSEEK	STD	{ off_t freebsd32_lseek(int fd, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int freebsd32_truncate(char *path, \
 				    uint32_t length1, uint32_t length2); }
 480	AUE_FTRUNCATE	STD	{ int freebsd32_ftruncate(int fd, \
 				    uint32_t length1, uint32_t length2); }
 #endif
 481	AUE_THR_KILL2	NOPROTO	{ int thr_kill2(pid_t pid, long id, int sig); }
 482	AUE_SHMOPEN	NOPROTO	{ int shm_open(const char *path, int flags, \
 				    mode_t mode); }
 483	AUE_SHMUNLINK	NOPROTO	{ int shm_unlink(const char *path); }
 484	AUE_NULL	NOPROTO	{ int cpuset(cpusetid_t *setid); }
 #ifdef PAD64_REQUIRED
 485	AUE_NULL	STD	{ int freebsd32_cpuset_setid(cpuwhich_t which, \
 				    int pad, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t setid); }
 #else
 485	AUE_NULL	STD	{ int freebsd32_cpuset_setid(cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t setid); }
 #endif
 486	AUE_NULL	STD	{ int freebsd32_cpuset_getid(cpulevel_t level, \
 				    cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t *setid); }
 487	AUE_NULL	STD	{ int freebsd32_cpuset_getaffinity( \
 				    cpulevel_t level, cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    size_t cpusetsize, \
 				    cpuset_t *mask); }
 488	AUE_NULL	STD	{ int freebsd32_cpuset_setaffinity( \
 				    cpulevel_t level, cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    size_t cpusetsize, \
 				    const cpuset_t *mask); }
 489	AUE_FACCESSAT	NOPROTO	{ int faccessat(int fd, char *path, int amode, \
 				    int flag); }
 490	AUE_FCHMODAT	NOPROTO	{ int fchmodat(int fd, const char *path, \
 				    mode_t mode, int flag); }
 491	AUE_FCHOWNAT	NOPROTO	{ int fchownat(int fd, char *path, uid_t uid, \
 				    gid_t gid, int flag); }
 492	AUE_FEXECVE	STD	{ int freebsd32_fexecve(int fd, \
 				    uint32_t *argv, uint32_t *envv); }
-493	AUE_FSTATAT	STD	{ int freebsd32_fstatat(int fd, char *path, \
-				    struct stat *buf, int flag); }
+493	AUE_FSTATAT	COMPAT11 { int freebsd32_fstatat(int fd, \
+				    char *path, struct freebsd11_stat32 *buf, \
+				    int flag); }
 494	AUE_FUTIMESAT	STD	{ int freebsd32_futimesat(int fd, char *path, \
 				    struct timeval *times); }
 495	AUE_LINKAT	NOPROTO	{ int linkat(int fd1, char *path1, int fd2, \
 				    char *path2, int flag); }
 496	AUE_MKDIRAT	NOPROTO	{ int mkdirat(int fd, char *path, \
 				    mode_t mode); }
 497	AUE_MKFIFOAT	NOPROTO	{ int mkfifoat(int fd, char *path, \
 				    mode_t mode); }
-498	AUE_MKNODAT	NOPROTO	{ int mknodat(int fd, char *path, \
-				    mode_t mode, dev_t dev); }
+498	AUE_MKNODAT	COMPAT11 { int freebsd32_mknodat(int fd, char *path, \
+				    mode_t mode, uint32_t dev); }
 499	AUE_OPENAT_RWTC	NOPROTO	{ int openat(int fd, char *path, int flag, \
 				    mode_t mode); }
 500	AUE_READLINKAT	NOPROTO	{ int readlinkat(int fd, char *path, char *buf, \
 				    size_t bufsize); }
 501	AUE_RENAMEAT	NOPROTO	{ int renameat(int oldfd, char *old, int newfd, \
 				    const char *new); }
 502	AUE_SYMLINKAT	NOPROTO	{ int symlinkat(char *path1, int fd, \
 				    char *path2); }
 503	AUE_UNLINKAT	NOPROTO	{ int unlinkat(int fd, char *path, \
 				    int flag); }
 504	AUE_POSIX_OPENPT	NOPROTO	{ int posix_openpt(int flags); }
 ; 505 is initialised by the kgssapi code, if present.
 505	AUE_NULL	UNIMPL	gssd_syscall
 506	AUE_JAIL_GET	STD	{ int freebsd32_jail_get(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 507	AUE_JAIL_SET	STD	{ int freebsd32_jail_set(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 508	AUE_JAIL_REMOVE	NOPROTO	{ int jail_remove(int jid); }
 509	AUE_CLOSEFROM	NOPROTO	{ int closefrom(int lowfd); }
 510	AUE_SEMCTL	NOSTD { int freebsd32_semctl(int semid, int semnum, \
 				    int cmd, union semun32 *arg); }
 511	AUE_MSGCTL	NOSTD	{ int freebsd32_msgctl(int msqid, int cmd, \
 				    struct msqid_ds32 *buf); }
 512	AUE_SHMCTL	NOSTD	{ int freebsd32_shmctl(int shmid, int cmd, \
 				    struct shmid_ds32 *buf); }
 513	AUE_LPATHCONF	NOPROTO	{ int lpathconf(char *path, int name); }
 514	AUE_NULL	OBSOL	cap_new
 515	AUE_CAP_RIGHTS_GET	NOPROTO	{ int __cap_rights_get(int version, \
 				    int fd, cap_rights_t *rightsp); }
 516	AUE_CAP_ENTER	NOPROTO	{ int cap_enter(void); }
 517	AUE_CAP_GETMODE	NOPROTO	{ int cap_getmode(u_int *modep); }
 518	AUE_PDFORK	NOPROTO	{ int pdfork(int *fdp, int flags); }
 519	AUE_PDKILL	NOPROTO	{ int pdkill(int fd, int signum); }
 520	AUE_PDGETPID	NOPROTO	{ int pdgetpid(int fd, pid_t *pidp); }
 521	AUE_PDWAIT	UNIMPL	pdwait4
 522	AUE_SELECT	STD	{ int freebsd32_pselect(int nd, fd_set *in, \
 				    fd_set *ou, fd_set *ex, \
 				    const struct timespec32 *ts, \
 				    const sigset_t *sm); }
 523	AUE_GETLOGINCLASS	NOPROTO	{ int getloginclass(char *namebuf, \
 				    size_t namelen); }
 524	AUE_SETLOGINCLASS	NOPROTO	{ int setloginclass(const char *namebuf); }
 525	AUE_NULL	NOPROTO	{ int rctl_get_racct(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 526	AUE_NULL	NOPROTO	{ int rctl_get_rules(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 527	AUE_NULL	NOPROTO	{ int rctl_get_limits(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 528	AUE_NULL	NOPROTO	{ int rctl_add_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 529	AUE_NULL	NOPROTO	{ int rctl_remove_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 #ifdef PAD64_REQUIRED
 530	AUE_POSIX_FALLOCATE	STD	{ int freebsd32_posix_fallocate(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2); }
 531	AUE_POSIX_FADVISE	STD	{ int freebsd32_posix_fadvise(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
 532	AUE_WAIT6	STD	{ int freebsd32_wait6(int idtype, int pad, \
 				    uint32_t id1, uint32_t id2, \
 				    int *status, int options, \
 				    struct wrusage32 *wrusage, \
 				    siginfo_t *info); }
 #else
 530	AUE_POSIX_FALLOCATE	STD	{ int freebsd32_posix_fallocate(int fd,\
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2); }
 531	AUE_POSIX_FADVISE	STD	{ int freebsd32_posix_fadvise(int fd, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
 532	AUE_WAIT6	STD	{ int freebsd32_wait6(int idtype, \
 				    uint32_t id1, uint32_t id2, \
 				    int *status, int options, \
 				    struct wrusage32 *wrusage, \
 				    siginfo_t *info); }
 #endif
 533	AUE_CAP_RIGHTS_LIMIT	NOPROTO	{ \
 				    int cap_rights_limit(int fd, \
 				    cap_rights_t *rightsp); }
 534	AUE_CAP_IOCTLS_LIMIT	STD	{ \
 				    int freebsd32_cap_ioctls_limit(int fd, \
 				    const uint32_t *cmds, size_t ncmds); }
 535	AUE_CAP_IOCTLS_GET	STD	{ \
 				    ssize_t freebsd32_cap_ioctls_get(int fd, \
 				    uint32_t *cmds, size_t maxcmds); }
 536	AUE_CAP_FCNTLS_LIMIT	NOPROTO	{ int cap_fcntls_limit(int fd, \
 				    uint32_t fcntlrights); }
 537	AUE_CAP_FCNTLS_GET	NOPROTO	{ int cap_fcntls_get(int fd, \
 				    uint32_t *fcntlrightsp); }
 538	AUE_BINDAT	NOPROTO	{ int bindat(int fd, int s, caddr_t name, \
 				    int namelen); }
 539	AUE_CONNECTAT	NOPROTO	{ int connectat(int fd, int s, caddr_t name, \
 				    int namelen); }
 540	AUE_CHFLAGSAT	NOPROTO	{ int chflagsat(int fd, const char *path, \
 				    u_long flags, int atflag); }
 541	AUE_ACCEPT	NOPROTO	{ int accept4(int s, \
 				    struct sockaddr * __restrict name, \
 				    __socklen_t * __restrict anamelen, \
 				    int flags); }
 542	AUE_PIPE	NOPROTO	{ int pipe2(int *fildes, int flags); }
 543	AUE_AIO_MLOCK	STD	{ int freebsd32_aio_mlock( \
 				    struct aiocb32 *aiocbp); }
 #ifdef PAD64_REQUIRED
 544	AUE_PROCCTL	STD	{ int freebsd32_procctl(int idtype, int pad, \
 				    uint32_t id1, uint32_t id2, int com, \
 				    void *data); }
 #else
 544	AUE_PROCCTL	STD	{ int freebsd32_procctl(int idtype, \
 				    uint32_t id1, uint32_t id2, int com, \
 				    void *data); }
 #endif
 545	AUE_POLL	STD	{ int freebsd32_ppoll(struct pollfd *fds, \
 				    u_int nfds, const struct timespec32 *ts, \
 				    const sigset_t *set); }
 546	AUE_FUTIMES	STD	{ int freebsd32_futimens(int fd, \
 				    struct timespec *times); }
 547	AUE_FUTIMESAT	STD	{ int freebsd32_utimensat(int fd, \
 				    char *path, \
 				    struct timespec *times, int flag); }
 548	AUE_NULL	NOPROTO	{ int numa_getaffinity(cpuwhich_t which, \
 				    id_t id, \
 				    struct vm_domain_policy *policy); }
 549	AUE_NULL	NOPROTO	{ int numa_setaffinity(cpuwhich_t which, \
 				    id_t id, \
 				    const struct vm_domain_policy *policy); }
 550	AUE_FSYNC	NOPROTO	{ int fdatasync(int fd); }
+551	AUE_FSTAT	STD	{ int freebsd32_fstat(int fd, \
+				    struct stat32 *ub); }
+552	AUE_FSTATAT	STD	{ int freebsd32_fstatat(int fd, \
+				    char *path, struct stat32 *buf, \
+				    int flag); }
+553	AUE_FHSTAT	STD	{ int freebsd32_fhstat( \
+				    const struct fhandle *u_fhp, \
+				    struct stat32 *sb); }
+554	AUE_GETDIRENTRIES STD	{ ssize_t freebsd32_getdirentries( \
+				    int fd, char *buf, size_t count, \
+				    int32_t *basep); }
+555	AUE_STATFS	NOPROTO	{ int statfs(char *path, \
+				    struct statfs32 *buf); }
+556	AUE_FSTATFS	NOPROTO	{ int fstatfs(int fd, struct statfs32 *buf); }
+557	AUE_GETFSSTAT	NOPROTO	{ int getfsstat(struct statfs32 *buf, \
+				    long bufsize, int mode); }
+558	AUE_FHSTATFS	NOPROTO	{ int fhstatfs(const struct fhandle *u_fhp, \
+				    struct statfs32 *buf); }
+559	AUE_MKNODAT	NOPROTO	{ int mknodat(int fd, char *path, mode_t mode, \
+				    dev_t dev); }
Index: head/sys/compat/linux/linux_file.c
===================================================================
--- head/sys/compat/linux/linux_file.c	(revision 318735)
+++ head/sys/compat/linux/linux_file.c	(revision 318736)
@@ -1,1673 +1,1677 @@
 /*-
  * Copyright (c) 1994-1995 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/tty.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #ifdef COMPAT_LINUX32
 #include <machine/../linux32/linux.h>
 #include <machine/../linux32/linux32_proto.h>
 #else
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #endif
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_file.h>
 
 static int	linux_common_open(struct thread *, int, char *, int, int);
 static int	linux_getdents_error(struct thread *, int, int);
 
 
 int
 linux_creat(struct thread *td, struct linux_creat_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 #ifdef DEBUG
 	if (ldebug(creat))
 		printf(ARGS(creat, "%s, %d"), path, args->mode);
 #endif
 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 
 
 static int
 linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
 {
 	cap_rights_t rights;
 	struct proc *p = td->td_proc;
 	struct file *fp;
 	int fd;
 	int bsd_flags, error;
 
 	bsd_flags = 0;
 	switch (l_flags & LINUX_O_ACCMODE) {
 	case LINUX_O_WRONLY:
 		bsd_flags |= O_WRONLY;
 		break;
 	case LINUX_O_RDWR:
 		bsd_flags |= O_RDWR;
 		break;
 	default:
 		bsd_flags |= O_RDONLY;
 	}
 	if (l_flags & LINUX_O_NDELAY)
 		bsd_flags |= O_NONBLOCK;
 	if (l_flags & LINUX_O_APPEND)
 		bsd_flags |= O_APPEND;
 	if (l_flags & LINUX_O_SYNC)
 		bsd_flags |= O_FSYNC;
 	if (l_flags & LINUX_O_NONBLOCK)
 		bsd_flags |= O_NONBLOCK;
 	if (l_flags & LINUX_FASYNC)
 		bsd_flags |= O_ASYNC;
 	if (l_flags & LINUX_O_CREAT)
 		bsd_flags |= O_CREAT;
 	if (l_flags & LINUX_O_TRUNC)
 		bsd_flags |= O_TRUNC;
 	if (l_flags & LINUX_O_EXCL)
 		bsd_flags |= O_EXCL;
 	if (l_flags & LINUX_O_NOCTTY)
 		bsd_flags |= O_NOCTTY;
 	if (l_flags & LINUX_O_DIRECT)
 		bsd_flags |= O_DIRECT;
 	if (l_flags & LINUX_O_NOFOLLOW)
 		bsd_flags |= O_NOFOLLOW;
 	if (l_flags & LINUX_O_DIRECTORY)
 		bsd_flags |= O_DIRECTORY;
 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
 
 	error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
 	if (error != 0)
 		goto done;
 	if (bsd_flags & O_NOCTTY)
 		goto done;
 
 	/*
 	 * XXX In between kern_open() and fget(), another process
 	 * having the same filedesc could use that fd without
 	 * checking below.
 	*/
 	fd = td->td_retval[0];
 	if (fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp) == 0) {
 		if (fp->f_type != DTYPE_VNODE) {
 			fdrop(fp, td);
 			goto done;
 		}
 		sx_slock(&proctree_lock);
 		PROC_LOCK(p);
 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
 			PROC_UNLOCK(p);
 			sx_sunlock(&proctree_lock);
 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
 			    td->td_ucred, td);
 		} else {
 			PROC_UNLOCK(p);
 			sx_sunlock(&proctree_lock);
 		}
 		fdrop(fp, td);
 	}
 
 done:
 #ifdef DEBUG
 	if (ldebug(open))
 		printf(LMSG("open returns error %d"), error);
 #endif
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_openat(struct thread *td, struct linux_openat_args *args)
 {
 	char *path;
 	int dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	if (args->flags & LINUX_O_CREAT)
 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
 	else
 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
 #ifdef DEBUG
 	if (ldebug(openat))
 		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
 		    path, args->flags, args->mode);
 #endif
 	return (linux_common_open(td, dfd, path, args->flags, args->mode));
 }
 
 int
 linux_open(struct thread *td, struct linux_open_args *args)
 {
 	char *path;
 
 	if (args->flags & LINUX_O_CREAT)
 		LCONVPATHCREAT(td, args->path, &path);
 	else
 		LCONVPATHEXIST(td, args->path, &path);
 #ifdef DEBUG
 	if (ldebug(open))
 		printf(ARGS(open, "%s, 0x%x, 0x%x"),
 		    path, args->flags, args->mode);
 #endif
 	return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode));
 }
 
 int
 linux_lseek(struct thread *td, struct linux_lseek_args *args)
 {
 
 #ifdef DEBUG
 	if (ldebug(lseek))
 		printf(ARGS(lseek, "%d, %ld, %d"),
 		    args->fdes, (long)args->off, args->whence);
 #endif
 	return (kern_lseek(td, args->fdes, args->off, args->whence));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_llseek(struct thread *td, struct linux_llseek_args *args)
 {
 	int error;
 	off_t off;
 
 #ifdef DEBUG
 	if (ldebug(llseek))
 		printf(ARGS(llseek, "%d, %d:%d, %d"),
 		    args->fd, args->ohigh, args->olow, args->whence);
 #endif
 	off = (args->olow) | (((off_t) args->ohigh) << 32);
 
 	error = kern_lseek(td, args->fd, off, args->whence);
 	if (error != 0)
 		return (error);
 
 	error = copyout(td->td_retval, args->res, sizeof(off_t));
 	if (error != 0)
 		return (error);
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 /*
  * Note that linux_getdents(2) and linux_getdents64(2) have the same
  * arguments. They only differ in the definition of struct dirent they
  * operate on.
  * Note that linux_readdir(2) is a special case of linux_getdents(2)
  * where count is always equals 1, meaning that the buffer is one
  * dirent-structure in size and that the code can't handle more anyway.
  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
  * trash user stack.
  */
 
 static int
 linux_getdents_error(struct thread *td, int fd, int err)
 {
 	cap_rights_t rights;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	/* Linux return ENOTDIR in case when fd is not a directory. */
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (ENOTDIR);
 	}
 	fdrop(fp, td);
 	return (err);
 }
 
 struct l_dirent {
 	l_ulong		d_ino;
 	l_off_t		d_off;
 	l_ushort	d_reclen;
 	char		d_name[LINUX_NAME_MAX + 1];
 };
 
 struct l_dirent64 {
 	uint64_t	d_ino;
 	int64_t		d_off;
 	l_ushort	d_reclen;
 	u_char		d_type;
 	char		d_name[LINUX_NAME_MAX + 1];
 };
 
 /*
  * Linux uses the last byte in the dirent buffer to store d_type,
  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
  */
 #define LINUX_RECLEN(namlen)						\
     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
 
 #define LINUX_RECLEN64(namlen)						\
     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
     sizeof(uint64_t))
 
 #define	LINUX_DIRBLKSIZ		512
 
 /*
  * Linux l_dirent is bigger than FreeBSD dirent, thus the buffer size
  * passed to kern_getdirentries() must be smaller than the one passed
  * to linux_getdents() by certain factor.
  */
 #define	LINUX_RECLEN_RATIO(X)	X * offsetof(struct dirent, d_name) /	\
     offsetof(struct l_dirent, d_name);
 #define	LINUX_RECLEN64_RATIO(X)	X * offsetof(struct dirent, d_name) / 	\
     offsetof(struct l_dirent64, d_name);
 
 int
 linux_getdents(struct thread *td, struct linux_getdents_args *args)
 {
 	struct dirent *bdp;
 	caddr_t inp, buf;		/* BSD-format */
 	int len, reclen;		/* BSD-format */
 	caddr_t outp;			/* Linux-format */
 	int resid, linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
-	long base;
+	off_t base;
 	struct l_dirent *linux_dirent;
 	int buflen, error;
 	size_t retval;
 
 #ifdef DEBUG
 	if (ldebug(getdents))
 		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
 #endif
 	buflen = LINUX_RECLEN_RATIO(args->count);
 	buflen = min(buflen, MAXBSIZE);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out1;
 	}
 
 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	len = td->td_retval[0];
 	inp = buf;
 	outp = (caddr_t)args->dent;
 	resid = args->count;
 	retval = 0;
 
 	while (len > 0) {
 		bdp = (struct dirent *) inp;
 		reclen = bdp->d_reclen;
 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
 		/*
 		 * No more space in the user supplied dirent buffer.
 		 * Return EINVAL.
 		 */
 		if (resid < linuxreclen) {
 			error = EINVAL;
 			goto out;
 		}
 
 		linux_dirent = (struct l_dirent*)lbuf;
 		linux_dirent->d_ino = bdp->d_fileno;
 		linux_dirent->d_off = base + reclen;
 		linux_dirent->d_reclen = linuxreclen;
 		/*
 		 * Copy d_type to last byte of l_dirent buffer
 		 */
 		lbuf[linuxreclen - 1] = bdp->d_type;
 		strlcpy(linux_dirent->d_name, bdp->d_name,
 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
 		error = copyout(linux_dirent, outp, linuxreclen);
 		if (error != 0)
 			goto out;
 
 		inp += reclen;
 		base += reclen;
 		len -= reclen;
 
 		retval += linuxreclen;
 		outp += linuxreclen;
 		resid -= linuxreclen;
 	}
 	td->td_retval[0] = retval;
 
 out:
 	free(lbuf, M_LINUX);
 out1:
 	free(buf, M_LINUX);
 	return (error);
 }
 
 int
 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
 {
 	struct dirent *bdp;
 	caddr_t inp, buf;		/* BSD-format */
 	int len, reclen;		/* BSD-format */
 	caddr_t outp;			/* Linux-format */
 	int resid, linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
-	long base;
+	off_t base;
 	struct l_dirent64 *linux_dirent64;
 	int buflen, error;
 	size_t retval;
 
 #ifdef DEBUG
 	if (ldebug(getdents64))
 		uprintf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
 #endif
 	buflen = LINUX_RECLEN64_RATIO(args->count);
 	buflen = min(buflen, MAXBSIZE);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out1;
 	}
 
 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	len = td->td_retval[0];
 	inp = buf;
 	outp = (caddr_t)args->dirent;
 	resid = args->count;
 	retval = 0;
 
 	while (len > 0) {
 		bdp = (struct dirent *) inp;
 		reclen = bdp->d_reclen;
 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
 		/*
 		 * No more space in the user supplied dirent buffer.
 		 * Return EINVAL.
 		 */
 		if (resid < linuxreclen) {
 			error = EINVAL;
 			goto out;
 		}
 
 		linux_dirent64 = (struct l_dirent64*)lbuf;
 		linux_dirent64->d_ino = bdp->d_fileno;
 		linux_dirent64->d_off = base + reclen;
 		linux_dirent64->d_reclen = linuxreclen;
 		linux_dirent64->d_type = bdp->d_type;
 		strlcpy(linux_dirent64->d_name, bdp->d_name,
 		    linuxreclen - offsetof(struct l_dirent64, d_name));
 		error = copyout(linux_dirent64, outp, linuxreclen);
 		if (error != 0)
 			goto out;
 
 		inp += reclen;
 		base += reclen;
 		len -= reclen;
 
 		retval += linuxreclen;
 		outp += linuxreclen;
 		resid -= linuxreclen;
 	}
 	td->td_retval[0] = retval;
 
 out:
 	free(lbuf, M_TEMP);
 out1:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_readdir(struct thread *td, struct linux_readdir_args *args)
 {
 	struct dirent *bdp;
 	caddr_t buf;			/* BSD-format */
 	int linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
-	long base;
+	off_t base;
 	struct l_dirent *linux_dirent;
 	int buflen, error;
 
 #ifdef DEBUG
 	if (ldebug(readdir))
 		printf(ARGS(readdir, "%d, *"), args->fd);
 #endif
 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
 	buflen = LINUX_RECLEN_RATIO(buflen);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out;
 	}
 	if (td->td_retval[0] == 0)
 		goto out;
 
 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	bdp = (struct dirent *) buf;
 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
 
 	linux_dirent = (struct l_dirent*)lbuf;
 	linux_dirent->d_ino = bdp->d_fileno;
 	linux_dirent->d_off = linuxreclen;
 	linux_dirent->d_reclen = bdp->d_namlen;
 	strlcpy(linux_dirent->d_name, bdp->d_name,
 	    linuxreclen - offsetof(struct l_dirent, d_name));
 	error = copyout(linux_dirent, args->dent, linuxreclen);
 	if (error == 0)
 		td->td_retval[0] = linuxreclen;
 
 	free(lbuf, M_LINUX);
 out:
 	free(buf, M_LINUX);
 	return (error);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 
 /*
  * These exist mainly for hooks for doing /compat/linux translation.
  */
 
 int
 linux_access(struct thread *td, struct linux_access_args *args)
 {
 	char *path;
 	int error;
 
 	/* linux convention */
 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
 		return (EINVAL);
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(access))
 		printf(ARGS(access, "%s, %d"), path, args->amode);
 #endif
 	error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
 	    args->amode);
 	LFREEPATH(path);
 
 	return (error);
 }
 
 int
 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	/* linux convention */
 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(access))
 		printf(ARGS(access, "%s, %d"), path, args->amode);
 #endif
 
 	error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
 	LFREEPATH(path);
 
 	return (error);
 }
 
 int
 linux_unlink(struct thread *td, struct linux_unlink_args *args)
 {
 	char *path;
 	int error;
 	struct stat st;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(unlink))
 		printf(ARGS(unlink, "%s"), path);
 #endif
 
 	error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0);
 	if (error == EPERM) {
 		/* Introduce POSIX noncompliant behaviour of Linux */
 		if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
 		    NULL) == 0) {
 			if (S_ISDIR(st.st_mode))
 				error = EISDIR;
 		}
 	}
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
 {
 	char *path;
 	int error, dfd;
 	struct stat st;
 
 	if (args->flag & ~LINUX_AT_REMOVEDIR)
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(unlinkat))
 		printf(ARGS(unlinkat, "%s"), path);
 #endif
 
 	if (args->flag & LINUX_AT_REMOVEDIR)
 		error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE);
 	else
 		error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0);
 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
 		/* Introduce POSIX noncompliant behaviour of Linux */
 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
 			error = EISDIR;
 	}
 	LFREEPATH(path);
 	return (error);
 }
 int
 linux_chdir(struct thread *td, struct linux_chdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chdir))
 		printf(ARGS(chdir, "%s"), path);
 #endif
 	error = kern_chdir(td, path, UIO_SYSSPACE);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_chmod(struct thread *td, struct linux_chmod_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chmod))
 		printf(ARGS(chmod, "%s, %d"), path, args->mode);
 #endif
 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE,
 	    args->mode, 0);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(fchmodat))
 		printf(ARGS(fchmodat, "%s, %d"), path, args->mode);
 #endif
 
 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHCREAT(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(mkdir))
 		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
 #endif
 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(mkdirat))
 		printf(ARGS(mkdirat, "%s, %d"), path, args->mode);
 #endif
 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(rmdir))
 		printf(ARGS(rmdir, "%s"), path);
 #endif
 	error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_rename(struct thread *td, struct linux_rename_args *args)
 {
 	char *from, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->from, &from);
 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(from);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(rename))
 		printf(ARGS(rename, "%s, %s"), from, to);
 #endif
 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
 	LFREEPATH(from);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_renameat(struct thread *td, struct linux_renameat_args *args)
 {
 	char *from, *to;
 	int error, olddfd, newdfd;
 
 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
 	if (to == NULL) {
 		LFREEPATH(from);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(renameat))
 		printf(ARGS(renameat, "%s, %s"), from, to);
 #endif
 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
 	LFREEPATH(from);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_symlink(struct thread *td, struct linux_symlink_args *args)
 {
 	char *path, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(symlink))
 		printf(ARGS(symlink, "%s, %s"), path, to);
 #endif
 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
 {
 	char *path, *to;
 	int error, dfd;
 
 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST(td, args->oldname, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(symlinkat))
 		printf(ARGS(symlinkat, "%s, %s"), path, to);
 #endif
 
 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_readlink(struct thread *td, struct linux_readlink_args *args)
 {
 	char *name;
 	int error;
 
 	LCONVPATHEXIST(td, args->name, &name);
 
 #ifdef DEBUG
 	if (ldebug(readlink))
 		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
 		    args->count);
 #endif
 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
 	    args->buf, UIO_USERSPACE, args->count);
 	LFREEPATH(name);
 	return (error);
 }
 
 int
 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
 {
 	char *name;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
 
 #ifdef DEBUG
 	if (ldebug(readlinkat))
 		printf(ARGS(readlinkat, "%s, %p, %d"), name, (void *)args->buf,
 		    args->bufsiz);
 #endif
 
 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
 	    UIO_USERSPACE, args->bufsiz);
 	LFREEPATH(name);
 	return (error);
 }
 
 int
 linux_truncate(struct thread *td, struct linux_truncate_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(truncate))
 		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
 #endif
 
 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
 	LFREEPATH(path);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(truncate64))
 		printf(ARGS(truncate64, "%s, %jd"), path, args->length);
 #endif
 
 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
 	LFREEPATH(path);
 	return (error);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 int
 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
 {
 
 	return (kern_ftruncate(td, args->fd, args->length));
 }
 
 int
 linux_link(struct thread *td, struct linux_link_args *args)
 {
 	char *path, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(link))
 		printf(ARGS(link, "%s, %s"), path, to);
 #endif
 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
 	    FOLLOW);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_linkat(struct thread *td, struct linux_linkat_args *args)
 {
 	char *path, *to;
 	int error, olddfd, newdfd, follow;
 
 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
 		return (EINVAL);
 
 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(linkat))
 		printf(ARGS(linkat, "%i, %s, %i, %s, %i"), args->olddfd, path,
 			args->newdfd, to, args->flag);
 #endif
 
 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
 	    FOLLOW;
 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_fdatasync(td, uap)
 	struct thread *td;
 	struct linux_fdatasync_args *uap;
 {
 
 	return (kern_fsync(td, uap->fd, false));
 }
 
 int
 linux_pread(struct thread *td, struct linux_pread_args *uap)
 {
 	cap_rights_t rights;
 	struct vnode *vp;
 	int error;
 
 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset);
 	if (error == 0) {
 		/* This seems to violate POSIX but linux does it */
 		error = fgetvp(td, uap->fd,
 		    cap_rights_init(&rights, CAP_PREAD), &vp);
 		if (error != 0)
 			return (error);
 		if (vp->v_type == VDIR) {
 			vrele(vp);
 			return (EISDIR);
 		}
 		vrele(vp);
 	}
 	return (error);
 }
 
 int
 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset));
 }
 
 int
 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 	off_t offset;
 
 	/*
 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
 	 * pos_l and pos_h, respectively, contain the
 	 * low order and high order 32 bits of offset.
 	 */
 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
 	    (sizeof(offset) * 4)) | uap->pos_l;
 	if (offset < 0)
 		return (EINVAL);
 #ifdef COMPAT_LINUX32
 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
 #else
 	error = copyinuio(uap->vec, uap->vlen, &auio);
 #endif
 	if (error != 0)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 	off_t offset;
 
 	/*
 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
 	 * pos_l and pos_h, respectively, contain the
 	 * low order and high order 32 bits of offset.
 	 */
 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
 	    (sizeof(offset) * 4)) | uap->pos_l;
 	if (offset < 0)
 		return (EINVAL);
 #ifdef COMPAT_LINUX32
 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
 #else
 	error = copyinuio(uap->vec, uap->vlen, &auio);
 #endif
 	if (error != 0)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 linux_mount(struct thread *td, struct linux_mount_args *args)
 {
 	char fstypename[MFSNAMELEN];
-	char mntonname[MNAMELEN], mntfromname[MNAMELEN];
-	int error;
-	int fsflags;
+	char *mntonname, *mntfromname;
+	int error, fsflags;
 
+	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
+	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
 	    NULL);
-	if (error)
-		return (error);
+	if (error != 0)
+		goto out;
 	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
-	if (error)
-		return (error);
+	if (error != 0)
+		goto out;
 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
-	if (error)
-		return (error);
+	if (error != 0)
+		goto out;
 
 #ifdef DEBUG
 	if (ldebug(mount))
 		printf(ARGS(mount, "%s, %s, %s"),
 		    fstypename, mntfromname, mntonname);
 #endif
 
 	if (strcmp(fstypename, "ext2") == 0) {
 		strcpy(fstypename, "ext2fs");
 	} else if (strcmp(fstypename, "proc") == 0) {
 		strcpy(fstypename, "linprocfs");
 	} else if (strcmp(fstypename, "vfat") == 0) {
 		strcpy(fstypename, "msdosfs");
 	}
 
 	fsflags = 0;
 
 	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
 		/*
 		 * Linux SYNC flag is not included; the closest equivalent
 		 * FreeBSD has is !ASYNC, which is our default.
 		 */
 		if (args->rwflag & LINUX_MS_RDONLY)
 			fsflags |= MNT_RDONLY;
 		if (args->rwflag & LINUX_MS_NOSUID)
 			fsflags |= MNT_NOSUID;
 		if (args->rwflag & LINUX_MS_NOEXEC)
 			fsflags |= MNT_NOEXEC;
 		if (args->rwflag & LINUX_MS_REMOUNT)
 			fsflags |= MNT_UPDATE;
 	}
 
 	error = kernel_vmount(fsflags,
 	    "fstype", fstypename,
 	    "fspath", mntonname,
 	    "from", mntfromname,
 	    NULL);
+out:
+	free(mntonname, M_TEMP);
+	free(mntfromname, M_TEMP);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
 {
 	struct linux_umount_args args2;
 
 	args2.path = args->path;
 	args2.flags = 0;
 	return (linux_umount(td, &args2));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 int
 linux_umount(struct thread *td, struct linux_umount_args *args)
 {
 	struct unmount_args bsd;
 
 	bsd.path = args->path;
 	bsd.flags = args->flags;	/* XXX correct? */
 	return (sys_unmount(td, &bsd));
 }
 
 /*
  * fcntl family of syscalls
  */
 
 struct l_flock {
 	l_short		l_type;
 	l_short		l_whence;
 	l_off_t		l_start;
 	l_off_t		l_len;
 	l_pid_t		l_pid;
 }
 #if defined(__amd64__) && defined(COMPAT_LINUX32)
 __packed
 #endif
 ;
 
 static void
 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
 {
 	switch (linux_flock->l_type) {
 	case LINUX_F_RDLCK:
 		bsd_flock->l_type = F_RDLCK;
 		break;
 	case LINUX_F_WRLCK:
 		bsd_flock->l_type = F_WRLCK;
 		break;
 	case LINUX_F_UNLCK:
 		bsd_flock->l_type = F_UNLCK;
 		break;
 	default:
 		bsd_flock->l_type = -1;
 		break;
 	}
 	bsd_flock->l_whence = linux_flock->l_whence;
 	bsd_flock->l_start = (off_t)linux_flock->l_start;
 	bsd_flock->l_len = (off_t)linux_flock->l_len;
 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
 	bsd_flock->l_sysid = 0;
 }
 
 static void
 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
 {
 	switch (bsd_flock->l_type) {
 	case F_RDLCK:
 		linux_flock->l_type = LINUX_F_RDLCK;
 		break;
 	case F_WRLCK:
 		linux_flock->l_type = LINUX_F_WRLCK;
 		break;
 	case F_UNLCK:
 		linux_flock->l_type = LINUX_F_UNLCK;
 		break;
 	}
 	linux_flock->l_whence = bsd_flock->l_whence;
 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 struct l_flock64 {
 	l_short		l_type;
 	l_short		l_whence;
 	l_loff_t	l_start;
 	l_loff_t	l_len;
 	l_pid_t		l_pid;
 }
 #if defined(__amd64__) && defined(COMPAT_LINUX32)
 __packed
 #endif
 ;
 
 static void
 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
 {
 	switch (linux_flock->l_type) {
 	case LINUX_F_RDLCK:
 		bsd_flock->l_type = F_RDLCK;
 		break;
 	case LINUX_F_WRLCK:
 		bsd_flock->l_type = F_WRLCK;
 		break;
 	case LINUX_F_UNLCK:
 		bsd_flock->l_type = F_UNLCK;
 		break;
 	default:
 		bsd_flock->l_type = -1;
 		break;
 	}
 	bsd_flock->l_whence = linux_flock->l_whence;
 	bsd_flock->l_start = (off_t)linux_flock->l_start;
 	bsd_flock->l_len = (off_t)linux_flock->l_len;
 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
 	bsd_flock->l_sysid = 0;
 }
 
 static void
 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
 {
 	switch (bsd_flock->l_type) {
 	case F_RDLCK:
 		linux_flock->l_type = LINUX_F_RDLCK;
 		break;
 	case F_WRLCK:
 		linux_flock->l_type = LINUX_F_WRLCK;
 		break;
 	case F_UNLCK:
 		linux_flock->l_type = LINUX_F_UNLCK;
 		break;
 	}
 	linux_flock->l_whence = bsd_flock->l_whence;
 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 static int
 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
 {
 	struct l_flock linux_flock;
 	struct flock bsd_flock;
 	cap_rights_t rights;
 	struct file *fp;
 	long arg;
 	int error, result;
 
 	switch (args->cmd) {
 	case LINUX_F_DUPFD:
 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
 
 	case LINUX_F_GETFD:
 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
 
 	case LINUX_F_SETFD:
 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
 
 	case LINUX_F_GETFL:
 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
 		result = td->td_retval[0];
 		td->td_retval[0] = 0;
 		if (result & O_RDONLY)
 			td->td_retval[0] |= LINUX_O_RDONLY;
 		if (result & O_WRONLY)
 			td->td_retval[0] |= LINUX_O_WRONLY;
 		if (result & O_RDWR)
 			td->td_retval[0] |= LINUX_O_RDWR;
 		if (result & O_NDELAY)
 			td->td_retval[0] |= LINUX_O_NONBLOCK;
 		if (result & O_APPEND)
 			td->td_retval[0] |= LINUX_O_APPEND;
 		if (result & O_FSYNC)
 			td->td_retval[0] |= LINUX_O_SYNC;
 		if (result & O_ASYNC)
 			td->td_retval[0] |= LINUX_FASYNC;
 #ifdef LINUX_O_NOFOLLOW
 		if (result & O_NOFOLLOW)
 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
 #endif
 #ifdef LINUX_O_DIRECT
 		if (result & O_DIRECT)
 			td->td_retval[0] |= LINUX_O_DIRECT;
 #endif
 		return (error);
 
 	case LINUX_F_SETFL:
 		arg = 0;
 		if (args->arg & LINUX_O_NDELAY)
 			arg |= O_NONBLOCK;
 		if (args->arg & LINUX_O_APPEND)
 			arg |= O_APPEND;
 		if (args->arg & LINUX_O_SYNC)
 			arg |= O_FSYNC;
 		if (args->arg & LINUX_FASYNC)
 			arg |= O_ASYNC;
 #ifdef LINUX_O_NOFOLLOW
 		if (args->arg & LINUX_O_NOFOLLOW)
 			arg |= O_NOFOLLOW;
 #endif
 #ifdef LINUX_O_DIRECT
 		if (args->arg & LINUX_O_DIRECT)
 			arg |= O_DIRECT;
 #endif
 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
 
 	case LINUX_F_GETLK:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
 		if (error)
 			return (error);
 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
 		return (copyout(&linux_flock, (void *)args->arg,
 		    sizeof(linux_flock)));
 
 	case LINUX_F_SETLK:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLK,
 		    (intptr_t)&bsd_flock));
 
 	case LINUX_F_SETLKW:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLKW,
 		     (intptr_t)&bsd_flock));
 
 	case LINUX_F_GETOWN:
 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
 
 	case LINUX_F_SETOWN:
 		/*
 		 * XXX some Linux applications depend on F_SETOWN having no
 		 * significant effect for pipes (SIGIO is not delivered for
 		 * pipes under Linux-2.2.35 at least).
 		 */
 		error = fget(td, args->fd,
 		    cap_rights_init(&rights, CAP_FCNTL), &fp);
 		if (error)
 			return (error);
 		if (fp->f_type == DTYPE_PIPE) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 		fdrop(fp, td);
 
 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
 
 	case LINUX_F_DUPFD_CLOEXEC:
 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
 	}
 
 	return (EINVAL);
 }
 
 int
 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
 {
 
 #ifdef DEBUG
 	if (ldebug(fcntl))
 		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
 #endif
 
 	return (fcntl_common(td, args));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
 {
 	struct l_flock64 linux_flock;
 	struct flock bsd_flock;
 	struct linux_fcntl_args fcntl_args;
 	int error;
 
 #ifdef DEBUG
 	if (ldebug(fcntl64))
 		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
 #endif
 
 	switch (args->cmd) {
 	case LINUX_F_GETLK64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
 		if (error)
 			return (error);
 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
 		return (copyout(&linux_flock, (void *)args->arg,
 			    sizeof(linux_flock)));
 
 	case LINUX_F_SETLK64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLK,
 		    (intptr_t)&bsd_flock));
 
 	case LINUX_F_SETLKW64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLKW,
 		    (intptr_t)&bsd_flock));
 	}
 
 	fcntl_args.fd = args->fd;
 	fcntl_args.cmd = args->cmd;
 	fcntl_args.arg = args->arg;
 	return (fcntl_common(td, &fcntl_args));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 int
 linux_chown(struct thread *td, struct linux_chown_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chown))
 		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
 	    args->gid, 0);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
 {
 	char *path;
 	int error, dfd, flag;
 
 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(fchownat))
 		printf(ARGS(fchownat, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 
 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
 	    AT_SYMLINK_NOFOLLOW;
 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
 	    flag);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_lchown(struct thread *td, struct linux_lchown_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(lchown))
 		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
 	    args->gid, AT_SYMLINK_NOFOLLOW);
 	LFREEPATH(path);
 	return (error);
 }
 
 static int
 convert_fadvice(int advice)
 {
 	switch (advice) {
 	case LINUX_POSIX_FADV_NORMAL:
 		return (POSIX_FADV_NORMAL);
 	case LINUX_POSIX_FADV_RANDOM:
 		return (POSIX_FADV_RANDOM);
 	case LINUX_POSIX_FADV_SEQUENTIAL:
 		return (POSIX_FADV_SEQUENTIAL);
 	case LINUX_POSIX_FADV_WILLNEED:
 		return (POSIX_FADV_WILLNEED);
 	case LINUX_POSIX_FADV_DONTNEED:
 		return (POSIX_FADV_DONTNEED);
 	case LINUX_POSIX_FADV_NOREUSE:
 		return (POSIX_FADV_NOREUSE);
 	default:
 		return (-1);
 	}
 }
 
 int
 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
 {
 	int advice;
 
 	advice = convert_fadvice(args->advice);
 	if (advice == -1)
 		return (EINVAL);
 	return (kern_posix_fadvise(td, args->fd, args->offset, args->len,
 	    advice));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
 {
 	int advice;
 
 	advice = convert_fadvice(args->advice);
 	if (advice == -1)
 		return (EINVAL);
 	return (kern_posix_fadvise(td, args->fd, args->offset, args->len,
 	    advice));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 int
 linux_pipe(struct thread *td, struct linux_pipe_args *args)
 {
 	int fildes[2];
 	int error;
 
 #ifdef DEBUG
 	if (ldebug(pipe))
 		printf(ARGS(pipe, "*"));
 #endif
 
 	error = kern_pipe(td, fildes, 0, NULL, NULL);
 	if (error != 0)
 		return (error);
 
 	error = copyout(fildes, args->pipefds, sizeof(fildes));
 	if (error != 0) {
 		(void)kern_close(td, fildes[0]);
 		(void)kern_close(td, fildes[1]);
 	}
 
 	return (error);
 }
 
 int
 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
 {
 	int fildes[2];
 	int error, flags;
 
 #ifdef DEBUG
 	if (ldebug(pipe2))
 		printf(ARGS(pipe2, "*, %d"), args->flags);
 #endif
 
 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
 		return (EINVAL);
 
 	flags = 0;
 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
 		flags |= O_NONBLOCK;
 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
 		flags |= O_CLOEXEC;
 	error = kern_pipe(td, fildes, flags, NULL, NULL);
 	if (error != 0)
 		return (error);
 
 	error = copyout(fildes, args->pipefds, sizeof(fildes));
 	if (error != 0) {
 		(void)kern_close(td, fildes[0]);
 		(void)kern_close(td, fildes[1]);
 	}
 
 	return (error);
 }
 
 int
 linux_dup3(struct thread *td, struct linux_dup3_args *args)
 {
 	int cmd;
 	intptr_t newfd;
 
 	if (args->oldfd == args->newfd)
 		return (EINVAL);
 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
 		return (EINVAL);
 	if (args->flags & LINUX_O_CLOEXEC)
 		cmd = F_DUP2FD_CLOEXEC;
 	else
 		cmd = F_DUP2FD;
 
 	newfd = args->newfd;
 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
 }
 
 int
 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
 {
 
 	/*
 	 * We emulate only posix_fallocate system call for which
 	 * mode should be 0.
 	 */
 	if (args->mode != 0)
 		return (ENOSYS);
 
 	return (kern_posix_fallocate(td, args->fd, args->offset,
 	    args->len));
 }
Index: head/sys/dev/snp/snp.c
===================================================================
--- head/sys/dev/snp/snp.c	(revision 318735)
+++ head/sys/dev/snp/snp.c	(revision 318736)
@@ -1,361 +1,369 @@
 /*-
  * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/snoop.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/tty.h>
 #include <sys/uio.h>
 
 static struct cdev	*snp_dev;
 static MALLOC_DEFINE(M_SNP, "snp", "tty snoop device");
 
 /* XXX: should be mtx, but TTY can be locked by Giant. */
 #if 0
 static struct mtx	snp_register_lock;
 MTX_SYSINIT(snp_register_lock, &snp_register_lock,
     "tty snoop registration", MTX_DEF);
 #define	SNP_LOCK()	mtx_lock(&snp_register_lock)
 #define	SNP_UNLOCK()	mtx_unlock(&snp_register_lock)
 #else
 static struct sx	snp_register_lock;
 SX_SYSINIT(snp_register_lock, &snp_register_lock,
     "tty snoop registration");
 #define	SNP_LOCK()	sx_xlock(&snp_register_lock)
 #define	SNP_UNLOCK()	sx_xunlock(&snp_register_lock)
 #endif
 
+#define	SNPGTYY_32DEV	_IOR('T', 89, uint32_t)
+
 /*
  * There is no need to have a big input buffer. In most typical setups,
  * we won't inject much data into the TTY, because users can't type
  * really fast.
  */
 #define SNP_INPUT_BUFSIZE	16
 /*
  * The output buffer has to be really big. Right now we don't support
  * any form of flow control, which means we lost any data we can't
  * accept. We set the output buffer size to about twice the size of a
  * pseudo-terminal/virtual console's output buffer.
  */
 #define SNP_OUTPUT_BUFSIZE	16384
 
 static d_open_t		snp_open;
 static d_read_t		snp_read;
 static d_write_t	snp_write;
 static d_ioctl_t	snp_ioctl;
 static d_poll_t		snp_poll;
 
 static struct cdevsw snp_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= snp_open,
 	.d_read		= snp_read,
 	.d_write	= snp_write,
 	.d_ioctl	= snp_ioctl,
 	.d_poll		= snp_poll,
 	.d_name		= "snp",
 };
 
 static th_getc_capture_t	snp_getc_capture;
 
 static struct ttyhook snp_hook = {
 	.th_getc_capture	= snp_getc_capture,
 };
 
 /*
  * Per-instance structure.
  *
  * List of locks
  * (r)	locked by snp_register_lock on assignment
  * (t)	locked by tty_lock
  */
 struct snp_softc {
 	struct tty	*snp_tty;	/* (r) TTY we're snooping. */
 	struct ttyoutq	snp_outq;	/* (t) Output queue. */
 	struct cv	snp_outwait;	/* (t) Output wait queue. */
 	struct selinfo	snp_outpoll;	/* (t) Output polling. */
 };
 
 static void
 snp_dtor(void *data)
 {
 	struct snp_softc *ss = data;
 	struct tty *tp;
 
 	tp = ss->snp_tty;
 	if (tp != NULL) {
 		tty_lock(tp);
 		ttyoutq_free(&ss->snp_outq);
 		ttyhook_unregister(tp);
 	}
 
 	cv_destroy(&ss->snp_outwait);
 	free(ss, M_SNP);
 }
 
 /*
  * Snoop device node routines.
  */
 
 static int
 snp_open(struct cdev *dev, int flag, int mode, struct thread *td)
 {
 	struct snp_softc *ss;
 
 	/* Allocate per-snoop data. */
 	ss = malloc(sizeof(struct snp_softc), M_SNP, M_WAITOK|M_ZERO);
 	cv_init(&ss->snp_outwait, "snp out");
 
 	devfs_set_cdevpriv(ss, snp_dtor);
 
 	return (0);
 }
 
 static int
 snp_read(struct cdev *dev, struct uio *uio, int flag)
 {
 	int error, oresid = uio->uio_resid;
 	struct snp_softc *ss;
 	struct tty *tp;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	error = devfs_get_cdevpriv((void **)&ss);
 	if (error != 0)
 		return (error);
 
 	tp = ss->snp_tty;
 	if (tp == NULL || tty_gone(tp))
 		return (EIO);
 
 	tty_lock(tp);
 	for (;;) {
 		error = ttyoutq_read_uio(&ss->snp_outq, tp, uio);
 		if (error != 0 || uio->uio_resid != oresid)
 			break;
 
 		/* Wait for more data. */
 		if (flag & O_NONBLOCK) {
 			error = EWOULDBLOCK;
 			break;
 		}
 		error = cv_wait_sig(&ss->snp_outwait, tp->t_mtx);
 		if (error != 0)
 			break;
 		if (tty_gone(tp)) {
 			error = EIO;
 			break;
 		}
 	}
 	tty_unlock(tp);
 
 	return (error);
 }
 
 static int
 snp_write(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct snp_softc *ss;
 	struct tty *tp;
 	int error, len;
 	char in[SNP_INPUT_BUFSIZE];
 
 	error = devfs_get_cdevpriv((void **)&ss);
 	if (error != 0)
 		return (error);
 
 	tp = ss->snp_tty;
 	if (tp == NULL || tty_gone(tp))
 		return (EIO);
 
 	while (uio->uio_resid > 0) {
 		/* Read new data. */
 		len = imin(uio->uio_resid, sizeof in);
 		error = uiomove(in, len, uio);
 		if (error != 0)
 			return (error);
 
 		tty_lock(tp);
 
 		/* Driver could have abandoned the TTY in the mean time. */
 		if (tty_gone(tp)) {
 			tty_unlock(tp);
 			return (ENXIO);
 		}
 
 		/*
 		 * Deliver data to the TTY. Ignore errors for now,
 		 * because we shouldn't bail out when we're running
 		 * close to the watermarks.
 		 */
 		ttydisc_rint_simple(tp, in, len);
 		ttydisc_rint_done(tp);
 
 		tty_unlock(tp);
 	}
 
 	return (0);
 }
 
 static int
 snp_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
     struct thread *td)
 {
 	struct snp_softc *ss;
 	struct tty *tp;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&ss);
 	if (error != 0)
 		return (error);
 
 	switch (cmd) {
 	case SNPSTTY:
 		/* Bind TTY to snoop instance. */
 		SNP_LOCK();
 		if (ss->snp_tty != NULL) {
 			SNP_UNLOCK();
 			return (EBUSY);
 		}
 		/*
 		 * XXXRW / XXXJA: no capability check here.
 		 */
 		error = ttyhook_register(&ss->snp_tty, td->td_proc,
 		    *(int *)data, &snp_hook, ss);
 		SNP_UNLOCK();
 		if (error != 0)
 			return (error);
 
 		/* Now that went okay, allocate a buffer for the queue. */
 		tp = ss->snp_tty;
 		tty_lock(tp);
 		ttyoutq_setsize(&ss->snp_outq, tp, SNP_OUTPUT_BUFSIZE);
 		tty_unlock(tp);
 
 		return (0);
 	case SNPGTTY:
 		/* Obtain device number of associated TTY. */
 		if (ss->snp_tty == NULL)
 			*(dev_t *)data = NODEV;
 		else
 			*(dev_t *)data = tty_udev(ss->snp_tty);
+		return (0);
+	case SNPGTYY_32DEV:
+		if (ss->snp_tty == NULL)
+			*(uint32_t *)data = -1;
+		else
+			*(uint32_t *)data = tty_udev(ss->snp_tty); /* trunc */
 		return (0);
 	case FIONREAD:
 		tp = ss->snp_tty;
 		if (tp != NULL) {
 			tty_lock(tp);
 			*(int *)data = ttyoutq_bytesused(&ss->snp_outq);
 			tty_unlock(tp);
 		} else {
 			*(int *)data = 0;
 		}
 		return (0);
 	default:
 		return (ENOTTY);
 	}
 }
 
 static int
 snp_poll(struct cdev *dev, int events, struct thread *td)
 {
 	struct snp_softc *ss;
 	struct tty *tp;
 	int revents;
 
 	if (devfs_get_cdevpriv((void **)&ss) != 0)
 		return (events &
 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
 
 	revents = 0;
 
 	if (events & (POLLIN | POLLRDNORM)) {
 		tp = ss->snp_tty;
 		if (tp != NULL) {
 			tty_lock(tp);
 			if (ttyoutq_bytesused(&ss->snp_outq) > 0)
 				revents |= events & (POLLIN | POLLRDNORM);
 			tty_unlock(tp);
 		}
 	}
 
 	if (revents == 0)
 		selrecord(td, &ss->snp_outpoll);
 
 	return (revents);
 }
 
 /*
  * TTY hook events.
  */
 
 static int
 snp_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		snp_dev = make_dev(&snp_cdevsw, 0,
 		    UID_ROOT, GID_WHEEL, 0600, "snp");
 		return (0);
 	case MOD_UNLOAD:
 		/* XXX: Make existing users leave. */
 		destroy_dev(snp_dev);
 		return (0);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static void
 snp_getc_capture(struct tty *tp, const void *buf, size_t len)
 {
 	struct snp_softc *ss = ttyhook_softc(tp);
 
 	ttyoutq_write(&ss->snp_outq, buf, len);
 
 	cv_broadcast(&ss->snp_outwait);
 	selwakeup(&ss->snp_outpoll);
 }
 
 static moduledata_t snp_mod = {
 	"snp",
 	snp_modevent,
 	NULL
 };
 
 DECLARE_MODULE(snp, snp_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
Index: head/sys/fs/devfs/devfs_devs.c
===================================================================
--- head/sys/fs/devfs/devfs_devs.c	(revision 318735)
+++ head/sys/fs/devfs/devfs_devs.c	(revision 318736)
@@ -1,729 +1,741 @@
 /*-
  * Copyright (c) 2000,2004
  *	Poul-Henning Kamp.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36
  *
  * $FreeBSD$
  */
 
+#include "opt_compat.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <sys/kdb.h>
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * The one true (but secret) list of active devices in the system.
  * Locked by dev_lock()/devmtx
  */
 struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list);
 
 struct unrhdr *devfs_inos;
 
 
 static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2");
 static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3");
 static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage");
 
 SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem");
 
 static unsigned devfs_generation;
 SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD,
 	&devfs_generation, 0, "DEVFS generation number");
 
 unsigned devfs_rule_depth = 1;
 SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW,
 	&devfs_rule_depth, 0, "Max depth of ruleset include");
 
 /*
  * Helper sysctl for devname(3).  We're given a dev_t and return the
  * name, if any, registered by the device driver.
  */
 static int
 sysctl_devname(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	dev_t ud;
+#ifdef COMPAT_FREEBSD11
+	uint32_t ud_compat;
+#endif
 	struct cdev_priv *cdp;
 	struct cdev *dev;
 
-	error = SYSCTL_IN(req, &ud, sizeof (ud));
+#ifdef COMPAT_FREEBSD11
+	if (req->newlen == sizeof(ud_compat)) {
+		error = SYSCTL_IN(req, &ud_compat, sizeof(ud_compat));
+		if (error == 0)
+			ud = ud_compat == (uint32_t)NODEV ? NODEV : ud_compat;
+	} else
+#endif
+		error = SYSCTL_IN(req, &ud, sizeof (ud));
 	if (error)
 		return (error);
 	if (ud == NODEV)
 		return (EINVAL);
 	dev = NULL;
 	dev_lock();
 	TAILQ_FOREACH(cdp, &cdevp_list, cdp_list)
 		if (cdp->cdp_inode == ud) {
 			dev = &cdp->cdp_c;
 			dev_refl(dev);
 			break;
 		}
 	dev_unlock();
 	if (dev == NULL)
 		return (ENOENT);
 	error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
 	dev_rel(dev);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, devname,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_devname, "", "devname(3) handler");
 
 SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)");
 
 SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)");
 
 struct cdev *
 devfs_alloc(int flags)
 {
 	struct cdev_priv *cdp;
 	struct cdev *cdev;
 	struct timespec ts;
 
 	cdp = malloc(sizeof *cdp, M_CDEVP, M_ZERO |
 	    ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK));
 	if (cdp == NULL)
 		return (NULL);
 
 	cdp->cdp_dirents = &cdp->cdp_dirent0;
 
 	cdev = &cdp->cdp_c;
 	LIST_INIT(&cdev->si_children);
 	vfs_timestamp(&ts);
 	cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts;
 
 	return (cdev);
 }
 
 int
 devfs_dev_exists(const char *name)
 {
 	struct cdev_priv *cdp;
 
 	mtx_assert(&devmtx, MA_OWNED);
 
 	TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) {
 		if ((cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 		if (devfs_pathpath(cdp->cdp_c.si_name, name) != 0)
 			return (1);
 		if (devfs_pathpath(name, cdp->cdp_c.si_name) != 0)
 			return (1);
 	}
 	if (devfs_dir_find(name) != 0)
 		return (1);
 
 	return (0);
 }
 
 void
 devfs_free(struct cdev *cdev)
 {
 	struct cdev_priv *cdp;
 
 	cdp = cdev2priv(cdev);
 	if (cdev->si_cred != NULL)
 		crfree(cdev->si_cred);
 	devfs_free_cdp_inode(cdp->cdp_inode);
 	if (cdp->cdp_maxdirent > 0) 
 		free(cdp->cdp_dirents, M_DEVFS2);
 	free(cdp, M_CDEVP);
 }
 
 struct devfs_dirent *
 devfs_find(struct devfs_dirent *dd, const char *name, int namelen, int type)
 {
 	struct devfs_dirent *de;
 
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (namelen != de->de_dirent->d_namlen)
 			continue;
 		if (type != 0 && type != de->de_dirent->d_type)
 			continue;
 
 		/*
 		 * The race with finding non-active name is not
 		 * completely closed by the check, but it is similar
 		 * to the devfs_allocv() in making it unlikely enough.
 		 */
 		if (de->de_dirent->d_type == DT_CHR &&
 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 
 		if (bcmp(name, de->de_dirent->d_name, namelen) != 0)
 			continue;
 		break;
 	}
 	KASSERT(de == NULL || (de->de_flags & DE_DOOMED) == 0,
 	    ("devfs_find: returning a doomed entry"));
 	return (de);
 }
 
 struct devfs_dirent *
 devfs_newdirent(char *name, int namelen)
 {
 	int i;
 	struct devfs_dirent *de;
 	struct dirent d;
 
 	d.d_namlen = namelen;
 	i = sizeof(*de) + GENERIC_DIRSIZ(&d);
 	de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO);
 	de->de_dirent = (struct dirent *)(de + 1);
 	de->de_dirent->d_namlen = namelen;
 	de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d);
 	bcopy(name, de->de_dirent->d_name, namelen);
 	de->de_dirent->d_name[namelen] = '\0';
 	vfs_timestamp(&de->de_ctime);
 	de->de_mtime = de->de_atime = de->de_ctime;
 	de->de_links = 1;
 	de->de_holdcnt = 1;
 #ifdef MAC
 	mac_devfs_init(de);
 #endif
 	return (de);
 }
 
 struct devfs_dirent *
 devfs_parent_dirent(struct devfs_dirent *de)
 {
 
 	if (de->de_dirent->d_type != DT_DIR)
 		return (de->de_dir);
 
 	if (de->de_flags & (DE_DOT | DE_DOTDOT))
 		return (NULL);
 
 	de = TAILQ_FIRST(&de->de_dlist);	/* "." */
 	if (de == NULL)
 		return (NULL);
 	de = TAILQ_NEXT(de, de_list);		/* ".." */
 	if (de == NULL)
 		return (NULL);
 
 	return (de->de_dir);
 }
 
 struct devfs_dirent *
 devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen,
     struct devfs_dirent *dotdot, u_int inode)
 {
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de;
 
 	/* Create the new directory */
 	dd = devfs_newdirent(name, namelen);
 	TAILQ_INIT(&dd->de_dlist);
 	dd->de_dirent->d_type = DT_DIR;
 	dd->de_mode = 0555;
 	dd->de_links = 2;
 	dd->de_dir = dd;
 	if (inode != 0)
 		dd->de_inode = inode;
 	else
 		dd->de_inode = alloc_unr(devfs_inos);
 
 	/*
 	 * "." and ".." are always the two first entries in the
 	 * de_dlist list.
 	 *
 	 * Create the "." entry in the new directory.
 	 */
 	de = devfs_newdirent(".", 1);
 	de->de_dirent->d_type = DT_DIR;
 	de->de_flags |= DE_DOT;
 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
 	de->de_dir = dd;
 
 	/* Create the ".." entry in the new directory. */
 	de = devfs_newdirent("..", 2);
 	de->de_dirent->d_type = DT_DIR;
 	de->de_flags |= DE_DOTDOT;
 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
 	if (dotdot == NULL) {
 		de->de_dir = dd;
 	} else {
 		de->de_dir = dotdot;
 		sx_assert(&dmp->dm_lock, SX_XLOCKED);
 		TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list);
 		dotdot->de_links++;
 		devfs_rules_apply(dmp, dd);
 	}
 
 #ifdef MAC
 	mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd);
 #endif
 	return (dd);
 }
 
 void
 devfs_dirent_free(struct devfs_dirent *de)
 {
 	struct vnode *vp;
 
 	vp = de->de_vnode;
 	mtx_lock(&devfs_de_interlock);
 	if (vp != NULL && vp->v_data == de)
 		vp->v_data = NULL;
 	mtx_unlock(&devfs_de_interlock);
 	free(de, M_DEVFS3);
 }
 
 /*
  * Removes a directory if it is empty. Also empty parent directories are
  * removed recursively.
  */
 static void
 devfs_rmdir_empty(struct devfs_mount *dm, struct devfs_dirent *de)
 {
 	struct devfs_dirent *dd, *de_dot, *de_dotdot;
 
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 
 	for (;;) {
 		KASSERT(de->de_dirent->d_type == DT_DIR,
 		    ("devfs_rmdir_empty: de is not a directory"));
 
 		if ((de->de_flags & DE_DOOMED) != 0 || de == dm->dm_rootdir)
 			return;
 
 		de_dot = TAILQ_FIRST(&de->de_dlist);
 		KASSERT(de_dot != NULL, ("devfs_rmdir_empty: . missing"));
 		de_dotdot = TAILQ_NEXT(de_dot, de_list);
 		KASSERT(de_dotdot != NULL, ("devfs_rmdir_empty: .. missing"));
 		/* Return if the directory is not empty. */
 		if (TAILQ_NEXT(de_dotdot, de_list) != NULL)
 			return;
 
 		dd = devfs_parent_dirent(de);
 		KASSERT(dd != NULL, ("devfs_rmdir_empty: NULL dd"));
 		TAILQ_REMOVE(&de->de_dlist, de_dot, de_list);
 		TAILQ_REMOVE(&de->de_dlist, de_dotdot, de_list);
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		DEVFS_DE_HOLD(dd);
 		devfs_delete(dm, de, DEVFS_DEL_NORECURSE);
 		devfs_delete(dm, de_dot, DEVFS_DEL_NORECURSE);
 		devfs_delete(dm, de_dotdot, DEVFS_DEL_NORECURSE);
 		if (DEVFS_DE_DROP(dd)) {
 			devfs_dirent_free(dd);
 			return;
 		}
 
 		de = dd;
 	}
 }
 
 /*
  * The caller needs to hold the dm for the duration of the call since
  * dm->dm_lock may be temporary dropped.
  */
 void
 devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int flags)
 {
 	struct devfs_dirent *dd;
 	struct vnode *vp;
 
 	KASSERT((de->de_flags & DE_DOOMED) == 0,
 		("devfs_delete doomed dirent"));
 	de->de_flags |= DE_DOOMED;
 
 	if ((flags & DEVFS_DEL_NORECURSE) == 0) {
 		dd = devfs_parent_dirent(de);
 		if (dd != NULL)
 			DEVFS_DE_HOLD(dd);
 		if (de->de_flags & DE_USER) {
 			KASSERT(dd != NULL, ("devfs_delete: NULL dd"));
 			devfs_dir_unref_de(dm, dd);
 		}
 	} else
 		dd = NULL;
 
 	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
 		VI_LOCK(vp);
 		mtx_unlock(&devfs_de_interlock);
 		vholdl(vp);
 		sx_unlock(&dm->dm_lock);
 		if ((flags & DEVFS_DEL_VNLOCKED) == 0)
 			vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 		else
 			VI_UNLOCK(vp);
 		vgone(vp);
 		if ((flags & DEVFS_DEL_VNLOCKED) == 0)
 			VOP_UNLOCK(vp, 0);
 		vdrop(vp);
 		sx_xlock(&dm->dm_lock);
 	} else
 		mtx_unlock(&devfs_de_interlock);
 	if (de->de_symlink) {
 		free(de->de_symlink, M_DEVFS);
 		de->de_symlink = NULL;
 	}
 #ifdef MAC
 	mac_devfs_destroy(de);
 #endif
 	if (de->de_inode > DEVFS_ROOTINO) {
 		devfs_free_cdp_inode(de->de_inode);
 		de->de_inode = 0;
 	}
 	if (DEVFS_DE_DROP(de))
 		devfs_dirent_free(de);
 
 	if (dd != NULL) {
 		if (DEVFS_DE_DROP(dd))
 			devfs_dirent_free(dd);
 		else
 			devfs_rmdir_empty(dm, dd);
 	}
 }
 
 /*
  * Called on unmount.
  * Recursively removes the entire tree.
  * The caller needs to hold the dm for the duration of the call.
  */
 
 static void
 devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd)
 {
 	struct devfs_dirent *de;
 
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 
 	DEVFS_DE_HOLD(dd);
 	for (;;) {
 		/*
 		 * Use TAILQ_LAST() to remove "." and ".." last.
 		 * We might need ".." to resolve a path in
 		 * devfs_dir_unref_de().
 		 */
 		de = TAILQ_LAST(&dd->de_dlist, devfs_dlist_head);
 		if (de == NULL)
 			break;
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_flags & DE_USER)
 			devfs_dir_unref_de(dm, dd);
 		if (de->de_flags & (DE_DOT | DE_DOTDOT))
 			devfs_delete(dm, de, DEVFS_DEL_NORECURSE);
 		else if (de->de_dirent->d_type == DT_DIR)
 			devfs_purge(dm, de);
 		else
 			devfs_delete(dm, de, DEVFS_DEL_NORECURSE);
 	}
 	if (DEVFS_DE_DROP(dd))
 		devfs_dirent_free(dd);
 	else if ((dd->de_flags & DE_DOOMED) == 0)
 		devfs_delete(dm, dd, DEVFS_DEL_NORECURSE);
 }
 
 /*
  * Each cdev_priv has an array of pointers to devfs_dirent which is indexed
  * by the mount points dm_idx.
  * This function extends the array when necessary, taking into account that
  * the default array is 1 element and not malloc'ed.
  */
 static void
 devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm)
 {
 	struct devfs_dirent **dep;
 	int siz;
 
 	siz = (dm->dm_idx + 1) * sizeof *dep;
 	dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO);
 	dev_lock();
 	if (dm->dm_idx <= cdp->cdp_maxdirent) {
 		/* We got raced */
 		dev_unlock();
 		free(dep, M_DEVFS2);
 		return;
 	} 
 	memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep);
 	if (cdp->cdp_maxdirent > 0)
 		free(cdp->cdp_dirents, M_DEVFS2);
 	cdp->cdp_dirents = dep;
 	/*
 	 * XXX: if malloc told us how much we actually got this could
 	 * XXX: be optimized.
 	 */
 	cdp->cdp_maxdirent = dm->dm_idx;
 	dev_unlock();
 }
 
 /*
  * The caller needs to hold the dm for the duration of the call.
  */
 static int
 devfs_populate_loop(struct devfs_mount *dm, int cleanup)
 {
 	struct cdev_priv *cdp;
 	struct devfs_dirent *de;
 	struct devfs_dirent *dd, *dt;
 	struct cdev *pdev;
 	int de_flags, depth, j;
 	char *q, *s;
 
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 	dev_lock();
 	TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) {
 
 		KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents"));
 
 		/*
 		 * If we are unmounting, or the device has been destroyed,
 		 * clean up our dirent.
 		 */
 		if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) &&
 		    dm->dm_idx <= cdp->cdp_maxdirent &&
 		    cdp->cdp_dirents[dm->dm_idx] != NULL) {
 			de = cdp->cdp_dirents[dm->dm_idx];
 			cdp->cdp_dirents[dm->dm_idx] = NULL;
 			KASSERT(cdp == de->de_cdp,
 			    ("%s %d %s %p %p", __func__, __LINE__,
 			    cdp->cdp_c.si_name, cdp, de->de_cdp));
 			KASSERT(de->de_dir != NULL, ("Null de->de_dir"));
 			dev_unlock();
 
 			TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list);
 			de->de_cdp = NULL;
 			de->de_inode = 0;
 			devfs_delete(dm, de, 0);
 			dev_lock();
 			cdp->cdp_inuse--;
 			dev_unlock();
 			return (1);
 		}
 		/*
 	 	 * GC any lingering devices
 		 */
 		if (!(cdp->cdp_flags & CDP_ACTIVE)) {
 			if (cdp->cdp_inuse > 0)
 				continue;
 			TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
 			dev_unlock();
 			dev_rel(&cdp->cdp_c);
 			return (1);
 		}
 		/*
 		 * Don't create any new dirents if we are unmounting
 		 */
 		if (cleanup)
 			continue;
 		KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!"));
 
 		if (dm->dm_idx <= cdp->cdp_maxdirent &&
 		    cdp->cdp_dirents[dm->dm_idx] != NULL) {
 			de = cdp->cdp_dirents[dm->dm_idx];
 			KASSERT(cdp == de->de_cdp, ("inconsistent cdp"));
 			continue;
 		}
 
 
 		cdp->cdp_inuse++;
 		dev_unlock();
 
 		if (dm->dm_idx > cdp->cdp_maxdirent)
 		        devfs_metoo(cdp, dm);
 
 		dd = dm->dm_rootdir;
 		s = cdp->cdp_c.si_name;
 		for (;;) {
 			for (q = s; *q != '/' && *q != '\0'; q++)
 				continue;
 			if (*q != '/')
 				break;
 			de = devfs_find(dd, s, q - s, 0);
 			if (de == NULL)
 				de = devfs_vmkdir(dm, s, q - s, dd, 0);
 			else if (de->de_dirent->d_type == DT_LNK) {
 				de = devfs_find(dd, s, q - s, DT_DIR);
 				if (de == NULL)
 					de = devfs_vmkdir(dm, s, q - s, dd, 0);
 				de->de_flags |= DE_COVERED;
 			}
 			s = q + 1;
 			dd = de;
 			KASSERT(dd->de_dirent->d_type == DT_DIR &&
 			    (dd->de_flags & (DE_DOT | DE_DOTDOT)) == 0,
 			    ("%s: invalid directory (si_name=%s)",
 			    __func__, cdp->cdp_c.si_name));
 
 		}
 		de_flags = 0;
 		de = devfs_find(dd, s, q - s, DT_LNK);
 		if (de != NULL)
 			de_flags |= DE_COVERED;
 
 		de = devfs_newdirent(s, q - s);
 		if (cdp->cdp_c.si_flags & SI_ALIAS) {
 			de->de_uid = 0;
 			de->de_gid = 0;
 			de->de_mode = 0755;
 			de->de_dirent->d_type = DT_LNK;
 			pdev = cdp->cdp_c.si_parent;
 			dt = dd;
 			depth = 0;
 			while (dt != dm->dm_rootdir &&
 			    (dt = devfs_parent_dirent(dt)) != NULL)
 				depth++;
 			j = depth * 3 + strlen(pdev->si_name) + 1;
 			de->de_symlink = malloc(j, M_DEVFS, M_WAITOK);
 			de->de_symlink[0] = 0;
 			while (depth-- > 0)
 				strcat(de->de_symlink, "../");
 			strcat(de->de_symlink, pdev->si_name);
 		} else {
 			de->de_uid = cdp->cdp_c.si_uid;
 			de->de_gid = cdp->cdp_c.si_gid;
 			de->de_mode = cdp->cdp_c.si_mode;
 			de->de_dirent->d_type = DT_CHR;
 		}
 		de->de_flags |= de_flags;
 		de->de_inode = cdp->cdp_inode;
 		de->de_cdp = cdp;
 #ifdef MAC
 		mac_devfs_create_device(cdp->cdp_c.si_cred, dm->dm_mount,
 		    &cdp->cdp_c, de);
 #endif
 		de->de_dir = dd;
 		TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
 		devfs_rules_apply(dm, de);
 		dev_lock();
 		/* XXX: could check that cdp is still active here */
 		KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL,
 		    ("%s %d\n", __func__, __LINE__));
 		cdp->cdp_dirents[dm->dm_idx] = de;
 		KASSERT(de->de_cdp != (void *)0xdeadc0de,
 		    ("%s %d\n", __func__, __LINE__));
 		dev_unlock();
 		return (1);
 	}
 	dev_unlock();
 	return (0);
 }
 
 /*
  * The caller needs to hold the dm for the duration of the call.
  */
 void
 devfs_populate(struct devfs_mount *dm)
 {
 	unsigned gen;
 
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 	gen = devfs_generation;
 	if (dm->dm_generation == gen)
 		return;
 	while (devfs_populate_loop(dm, 0))
 		continue;
 	dm->dm_generation = gen;
 }
 
 /*
  * The caller needs to hold the dm for the duration of the call.
  */
 void
 devfs_cleanup(struct devfs_mount *dm)
 {
 
 	sx_assert(&dm->dm_lock, SX_XLOCKED);
 	while (devfs_populate_loop(dm, 1))
 		continue;
 	devfs_purge(dm, dm->dm_rootdir);
 }
 
 /*
  * devfs_create() and devfs_destroy() are called from kern_conf.c and
  * in both cases the devlock() mutex is held, so no further locking
  * is necessary and no sleeping allowed.
  */
 
 void
 devfs_create(struct cdev *dev)
 {
 	struct cdev_priv *cdp;
 
 	mtx_assert(&devmtx, MA_OWNED);
 	cdp = cdev2priv(dev);
 	cdp->cdp_flags |= CDP_ACTIVE;
 	cdp->cdp_inode = alloc_unrl(devfs_inos);
 	dev_refl(dev);
 	TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list);
 	devfs_generation++;
 }
 
 void
 devfs_destroy(struct cdev *dev)
 {
 	struct cdev_priv *cdp;
 
 	mtx_assert(&devmtx, MA_OWNED);
 	cdp = cdev2priv(dev);
 	cdp->cdp_flags &= ~CDP_ACTIVE;
 	devfs_generation++;
 }
 
 ino_t
 devfs_alloc_cdp_inode(void)
 {
 
 	return (alloc_unr(devfs_inos));
 }
 
 void
 devfs_free_cdp_inode(ino_t ino)
 {
 
 	if (ino > 0)
 		free_unr(devfs_inos, ino);
 }
 
 static void
 devfs_devs_init(void *junk __unused)
 {
 
 	devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx);
 }
 
 SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL);
Index: head/sys/fs/devfs/devfs_vnops.c
===================================================================
--- head/sys/fs/devfs/devfs_vnops.c	(revision 318735)
+++ head/sys/fs/devfs/devfs_vnops.c	(revision 318736)
@@ -1,1929 +1,1930 @@
 /*-
  * Copyright (c) 2000-2004
  *	Poul-Henning Kamp.  All rights reserved.
  * Copyright (c) 1989, 1992-1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
  *
  * $FreeBSD$
  */
 
 /*
  * TODO:
  *	mkdir: want it ?
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/ttycom.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 static struct vop_vector devfs_vnodeops;
 static struct vop_vector devfs_specops;
 static struct fileops devfs_ops_f;
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
 
 struct mtx	devfs_de_interlock;
 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
 struct sx	clone_drain_lock;
 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock");
 struct mtx	cdevpriv_mtx;
 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
 
 SYSCTL_DECL(_vfs_devfs);
 
 static int devfs_dotimes;
 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
 
 /*
  * Update devfs node timestamp.  Note that updates are unlocked and
  * stat(2) could see partially updated times.
  */
 static void
 devfs_timestamp(struct timespec *tsp)
 {
 	time_t ts;
 
 	if (devfs_dotimes) {
 		vfs_timestamp(tsp);
 	} else {
 		ts = time_second;
 		if (tsp->tv_sec != ts) {
 			tsp->tv_sec = ts;
 			tsp->tv_nsec = 0;
 		}
 	}
 }
 
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
     int *ref)
 {
 
 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
 	if (*devp != fp->f_data) {
 		if (*dswp != NULL)
 			dev_relthread(*devp, *ref);
 		return (ENXIO);
 	}
 	KASSERT((*devp)->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
 	if (*dswp == NULL)
 		return (ENXIO);
 	curthread->td_fpop = fp;
 	return (0);
 }
 
 int
 devfs_get_cdevpriv(void **datap)
 {
 	struct file *fp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (EBADF);
 	p = fp->f_cdevpriv;
 	if (p != NULL) {
 		error = 0;
 		*datap = p->cdpd_data;
 	} else
 		error = ENOENT;
 	return (error);
 }
 
 int
 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
 {
 	struct file *fp;
 	struct cdev_priv *cdp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (ENOENT);
 	cdp = cdev2priv((struct cdev *)fp->f_data);
 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
 	p->cdpd_data = priv;
 	p->cdpd_dtr = priv_dtr;
 	p->cdpd_fp = fp;
 	mtx_lock(&cdevpriv_mtx);
 	if (fp->f_cdevpriv == NULL) {
 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
 		fp->f_cdevpriv = p;
 		mtx_unlock(&cdevpriv_mtx);
 		error = 0;
 	} else {
 		mtx_unlock(&cdevpriv_mtx);
 		free(p, M_CDEVPDATA);
 		error = EBUSY;
 	}
 	return (error);
 }
 
 void
 devfs_destroy_cdevpriv(struct cdev_privdata *p)
 {
 
 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
 	KASSERT(p->cdpd_fp->f_cdevpriv == p,
 	    ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p));
 	p->cdpd_fp->f_cdevpriv = NULL;
 	LIST_REMOVE(p, cdpd_list);
 	mtx_unlock(&cdevpriv_mtx);
 	(p->cdpd_dtr)(p->cdpd_data);
 	free(p, M_CDEVPDATA);
 }
 
 static void
 devfs_fpdrop(struct file *fp)
 {
 	struct cdev_privdata *p;
 
 	mtx_lock(&cdevpriv_mtx);
 	if ((p = fp->f_cdevpriv) == NULL) {
 		mtx_unlock(&cdevpriv_mtx);
 		return;
 	}
 	devfs_destroy_cdevpriv(p);
 }
 
 void
 devfs_clear_cdevpriv(void)
 {
 	struct file *fp;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return;
 	devfs_fpdrop(fp);
 }
 
 /*
  * On success devfs_populate_vp() returns with dmp->dm_lock held.
  */
 static int
 devfs_populate_vp(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	int locked;
 
 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	locked = VOP_ISLOCKED(vp);
 
 	sx_xlock(&dmp->dm_lock);
 	DEVFS_DMP_HOLD(dmp);
 
 	/* Can't call devfs_populate() with the vnode lock held. */
 	VOP_UNLOCK(vp, 0);
 	devfs_populate(dmp);
 
 	sx_xunlock(&dmp->dm_lock);
 	vn_lock(vp, locked | LK_RETRY);
 	sx_xlock(&dmp->dm_lock);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ERESTART);
 	}
 	if ((vp->v_iflag & VI_DOOMED) != 0) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 	de = vp->v_data;
 	KASSERT(de != NULL,
 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
 	if ((de->de_flags & DE_DOOMED) != 0) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 
 	return (0);
 }
 
 static int
 devfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct devfs_mount *dmp;
 	char *buf = ap->a_buf;
 	int *buflen = ap->a_buflen;
 	struct devfs_dirent *dd, *de;
 	int i, error;
 
 	dmp = VFSTODEVFS(vp->v_mount);
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	i = *buflen;
 	dd = vp->v_data;
 
 	if (vp->v_type == VCHR) {
 		i -= strlen(dd->de_cdp->cdp_c.si_name);
 		if (i < 0) {
 			error = ENOMEM;
 			goto finished;
 		}
 		bcopy(dd->de_cdp->cdp_c.si_name, buf + i,
 		    strlen(dd->de_cdp->cdp_c.si_name));
 		de = dd->de_dir;
 	} else if (vp->v_type == VDIR) {
 		if (dd == dmp->dm_rootdir) {
 			*dvp = vp;
 			vref(*dvp);
 			goto finished;
 		}
 		i -= dd->de_dirent->d_namlen;
 		if (i < 0) {
 			error = ENOMEM;
 			goto finished;
 		}
 		bcopy(dd->de_dirent->d_name, buf + i,
 		    dd->de_dirent->d_namlen);
 		de = dd;
 	} else {
 		error = ENOENT;
 		goto finished;
 	}
 	*buflen = i;
 	de = devfs_parent_dirent(de);
 	if (de == NULL) {
 		error = ENOENT;
 		goto finished;
 	}
 	mtx_lock(&devfs_de_interlock);
 	*dvp = de->de_vnode;
 	if (*dvp != NULL) {
 		VI_LOCK(*dvp);
 		mtx_unlock(&devfs_de_interlock);
 		vholdl(*dvp);
 		VI_UNLOCK(*dvp);
 		vref(*dvp);
 		vdrop(*dvp);
 	} else {
 		mtx_unlock(&devfs_de_interlock);
 		error = ENOENT;
 	}
 finished:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /*
  * Construct the fully qualified path name relative to the mountpoint.
  * If a NULL cnp is provided, no '/' is appended to the resulting path.
  */
 char *
 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
     struct componentname *cnp)
 {
 	int i;
 	struct devfs_dirent *de;
 
 	sx_assert(&dmp->dm_lock, SA_LOCKED);
 
 	i = SPECNAMELEN;
 	buf[i] = '\0';
 	if (cnp != NULL)
 		i -= cnp->cn_namelen;
 	if (i < 0)
 		 return (NULL);
 	if (cnp != NULL)
 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
 	de = dd;
 	while (de != dmp->dm_rootdir) {
 		if (cnp != NULL || i < SPECNAMELEN) {
 			i--;
 			if (i < 0)
 				 return (NULL);
 			buf[i] = '/';
 		}
 		i -= de->de_dirent->d_namlen;
 		if (i < 0)
 			 return (NULL);
 		bcopy(de->de_dirent->d_name, buf + i,
 		    de->de_dirent->d_namlen);
 		de = devfs_parent_dirent(de);
 		if (de == NULL)
 			return (NULL);
 	}
 	return (buf + i);
 }
 
 static int
 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
 	struct devfs_dirent *de)
 {
 	int not_found;
 
 	not_found = 0;
 	if (de->de_flags & DE_DOOMED)
 		not_found = 1;
 	if (DEVFS_DE_DROP(de)) {
 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
 		devfs_dirent_free(de);
 	}
 	if (DEVFS_DMP_DROP(dmp)) {
 		KASSERT(not_found == 1,
 			("DEVFS mount struct freed before dirent"));
 		not_found = 2;
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 	}
 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
 		sx_unlock(&dmp->dm_lock);
 	return (not_found);
 }
 
 static void
 devfs_insmntque_dtr(struct vnode *vp, void *arg)
 {
 	struct devfs_dirent *de;
 
 	de = (struct devfs_dirent *)arg;
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = NULL;
 	de->de_vnode = NULL;
 	mtx_unlock(&devfs_de_interlock);
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
  * it on return.
  */
 int
 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
     struct vnode **vpp)
 {
 	int error;
 	struct vnode *vp;
 	struct cdev *dev;
 	struct devfs_mount *dmp;
 	struct cdevsw *dsw;
 
 	dmp = VFSTODEVFS(mp);
 	if (de->de_flags & DE_DOOMED) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ENOENT);
 	}
 loop:
 	DEVFS_DE_HOLD(de);
 	DEVFS_DMP_HOLD(dmp);
 	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
 		VI_LOCK(vp);
 		mtx_unlock(&devfs_de_interlock);
 		sx_xunlock(&dmp->dm_lock);
 		vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
 		sx_xlock(&dmp->dm_lock);
 		if (devfs_allocv_drop_refs(0, dmp, de)) {
 			vput(vp);
 			return (ENOENT);
 		}
 		else if ((vp->v_iflag & VI_DOOMED) != 0) {
 			mtx_lock(&devfs_de_interlock);
 			if (de->de_vnode == vp) {
 				de->de_vnode = NULL;
 				vp->v_data = NULL;
 			}
 			mtx_unlock(&devfs_de_interlock);
 			vput(vp);
 			goto loop;
 		}
 		sx_xunlock(&dmp->dm_lock);
 		*vpp = vp;
 		return (0);
 	}
 	mtx_unlock(&devfs_de_interlock);
 	if (de->de_dirent->d_type == DT_CHR) {
 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
 			devfs_allocv_drop_refs(1, dmp, de);
 			return (ENOENT);
 		}
 		dev = &de->de_cdp->cdp_c;
 	} else {
 		dev = NULL;
 	}
 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
 	if (error != 0) {
 		devfs_allocv_drop_refs(1, dmp, de);
 		printf("devfs_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
 
 	if (de->de_dirent->d_type == DT_CHR) {
 		vp->v_type = VCHR;
 		VI_LOCK(vp);
 		dev_lock();
 		dev_refl(dev);
 		/* XXX: v_rdev should be protect by vnode lock */
 		vp->v_rdev = dev;
 		KASSERT(vp->v_usecount == 1,
 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
 		dev->si_usecount += vp->v_usecount;
 		/* Special casing of ttys for deadfs.  Probably redundant. */
 		dsw = dev->si_devsw;
 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
 			vp->v_vflag |= VV_ISTTY;
 		dev_unlock();
 		VI_UNLOCK(vp);
 		if ((dev->si_flags & SI_ETERNAL) != 0)
 			vp->v_vflag |= VV_ETERNALDEV;
 		vp->v_op = &devfs_specops;
 	} else if (de->de_dirent->d_type == DT_DIR) {
 		vp->v_type = VDIR;
 	} else if (de->de_dirent->d_type == DT_LNK) {
 		vp->v_type = VLNK;
 	} else {
 		vp->v_type = VBAD;
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
 	VN_LOCK_ASHARE(vp);
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = de;
 	de->de_vnode = vp;
 	mtx_unlock(&devfs_de_interlock);
 	error = insmntque1(vp, mp, devfs_insmntque_dtr, de);
 	if (error != 0) {
 		(void) devfs_allocv_drop_refs(1, dmp, de);
 		return (error);
 	}
 	if (devfs_allocv_drop_refs(0, dmp, de)) {
 		vput(vp);
 		return (ENOENT);
 	}
 #ifdef MAC
 	mac_devfs_vnode_associate(mp, de, vp);
 #endif
 	sx_xunlock(&dmp->dm_lock);
 	*vpp = vp;
 	return (0);
 }
 
 static int
 devfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *de;
 	struct proc *p;
 	int error;
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
 	    ap->a_accmode, ap->a_cred, NULL);
 	if (error == 0)
 		return (0);
 	if (error != EACCES)
 		return (error);
 	p = ap->a_td->td_proc;
 	/* We do, however, allow access to the controlling terminal */
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == de->de_cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
     "devfs-only flag reuse failed");
 
 static int
 devfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *oldvp;
 	struct thread *td = ap->a_td;
 	struct proc *p;
 	struct cdev *dev = vp->v_rdev;
 	struct cdevsw *dsw;
 	int dflags, error, ref, vp_locked;
 
 	/*
 	 * XXX: Don't call d_close() if we were called because of
 	 * XXX: insmntque1() failure.
 	 */
 	if (vp->v_data == NULL)
 		return (0);
 
 	/*
 	 * Hack: a tty device that is a controlling terminal
 	 * has a reference from the session structure.
 	 * We cannot easily tell that a character device is
 	 * a controlling terminal, unless it is the closing
 	 * process' controlling terminal.  In that case,
 	 * if the reference count is 2 (this last descriptor
 	 * plus the session), release the reference from the session.
 	 */
 	if (td != NULL) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 		if (vp == p->p_session->s_ttyvp) {
 			PROC_UNLOCK(p);
 			oldvp = NULL;
 			sx_xlock(&proctree_lock);
 			if (vp == p->p_session->s_ttyvp) {
 				SESS_LOCK(p->p_session);
 				VI_LOCK(vp);
 				if (count_dev(dev) == 2 &&
 				    (vp->v_iflag & VI_DOOMED) == 0) {
 					p->p_session->s_ttyvp = NULL;
 					p->p_session->s_ttydp = NULL;
 					oldvp = vp;
 				}
 				VI_UNLOCK(vp);
 				SESS_UNLOCK(p->p_session);
 			}
 			sx_xunlock(&proctree_lock);
 			if (oldvp != NULL)
 				vrele(oldvp);
 		} else
 			PROC_UNLOCK(p);
 	}
 	/*
 	 * We do not want to really close the device if it
 	 * is still in use unless we are trying to close it
 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
 	 * holds a reference to the vnode, and because we mark
 	 * any other vnodes that alias this device, when the
 	 * sum of the reference counts on all the aliased
 	 * vnodes descends to one, we are on last close.
 	 */
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	dflags = 0;
 	VI_LOCK(vp);
 	if (vp->v_iflag & VI_DOOMED) {
 		/* Forced close. */
 		dflags |= FREVOKE | FNONBLOCK;
 	} else if (dsw->d_flags & D_TRACKCLOSE) {
 		/* Keep device updated on status. */
 	} else if (count_dev(dev) > 1) {
 		VI_UNLOCK(vp);
 		dev_relthread(dev, ref);
 		return (0);
 	}
 	if (count_dev(dev) == 1)
 		dflags |= FLASTCLOSE;
 	vholdl(vp);
 	VI_UNLOCK(vp);
 	vp_locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp, 0);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
 	dev_relthread(dev, ref);
 	vn_lock(vp, vp_locked | LK_RETRY);
 	vdrop(vp);
 	return (error);
 }
 
 static int
 devfs_close_f(struct file *fp, struct thread *td)
 {
 	int error;
 	struct file *fpop;
 
 	/*
 	 * NB: td may be NULL if this descriptor is closed due to
 	 * garbage collection from a closed UNIX domain socket.
 	 */
 	fpop = curthread->td_fpop;
 	curthread->td_fpop = fp;
 	error = vnops.fo_close(fp, td);
 	curthread->td_fpop = fpop;
 
 	/*
 	 * The f_cdevpriv cannot be assigned non-NULL value while we
 	 * are destroying the file.
 	 */
 	if (fp->f_cdevpriv != NULL)
 		devfs_fpdrop(fp);
 	return (error);
 }
 
 static int
 devfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	struct cdev *dev;
 	struct timeval boottime;
 	int error;
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xunlock(&dmp->dm_lock);
 
 	de = vp->v_data;
 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
 	if (vp->v_type == VDIR) {
 		de = de->de_dir;
 		KASSERT(de != NULL,
 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
 	}
 	vap->va_uid = de->de_uid;
 	vap->va_gid = de->de_gid;
 	vap->va_mode = de->de_mode;
 	if (vp->v_type == VLNK)
 		vap->va_size = strlen(de->de_symlink);
 	else if (vp->v_type == VDIR)
 		vap->va_size = vap->va_bytes = DEV_BSIZE;
 	else
 		vap->va_size = 0;
 	if (vp->v_type != VDIR)
 		vap->va_bytes = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_type = vp->v_type;
 
 	getboottime(&boottime);
 #define fix(aa)							\
 	do {							\
 		if ((aa).tv_sec <= 3600) {			\
 			(aa).tv_sec = boottime.tv_sec;		\
 			(aa).tv_nsec = boottime.tv_usec * 1000; \
 		}						\
 	} while (0)
 
 	if (vp->v_type != VCHR)  {
 		fix(de->de_atime);
 		vap->va_atime = de->de_atime;
 		fix(de->de_mtime);
 		vap->va_mtime = de->de_mtime;
 		fix(de->de_ctime);
 		vap->va_ctime = de->de_ctime;
 	} else {
 		dev = vp->v_rdev;
 		fix(dev->si_atime);
 		vap->va_atime = dev->si_atime;
 		fix(dev->si_mtime);
 		vap->va_mtime = dev->si_mtime;
 		fix(dev->si_ctime);
 		vap->va_ctime = dev->si_ctime;
 
 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
 	}
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_filerev = 0;
 	vap->va_nlink = de->de_links;
 	vap->va_fileid = de->de_inode;
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
 {
 	struct file *fpop;
 	int error;
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	error = vnops.fo_ioctl(fp, com, data, cred, td);
 	td->td_fpop = fpop;
 	return (error);
 }
 
 static int
 devfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct fiodgname_arg *fgn;
 	struct vnode *vpold, *vp;
 	struct cdevsw *dsw;
 	struct thread *td;
 	struct cdev *dev;
 	int error, ref, i;
 	const char *p;
 	u_long com;
 
 	vp = ap->a_vp;
 	com = ap->a_command;
 	td = ap->a_td;
 
 	dsw = devvn_refthread(vp, &dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
 
 	if (com == FIODTYPE) {
 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
 		error = 0;
 		goto out;
 	} else if (com == FIODGNAME) {
 		fgn = ap->a_data;
 		p = devtoname(dev);
 		i = strlen(p) + 1;
 		if (i > fgn->len)
 			error = EINVAL;
 		else
 			error = copyout(p, fgn->buf, i);
 		goto out;
 	}
 
 	error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
 
 out:
 	dev_relthread(dev, ref);
 	if (error == ENOIOCTL)
 		error = ENOTTY;
 
 	if (error == 0 && com == TIOCSCTTY) {
 		/* Do nothing if reassigning same control tty */
 		sx_slock(&proctree_lock);
 		if (td->td_proc->p_session->s_ttyvp == vp) {
 			sx_sunlock(&proctree_lock);
 			return (0);
 		}
 
 		vpold = td->td_proc->p_session->s_ttyvp;
 		VREF(vp);
 		SESS_LOCK(td->td_proc->p_session);
 		td->td_proc->p_session->s_ttyvp = vp;
 		td->td_proc->p_session->s_ttydp = cdev2priv(dev);
 		SESS_UNLOCK(td->td_proc->p_session);
 
 		sx_sunlock(&proctree_lock);
 
 		/* Get rid of reference to old control tty */
 		if (vpold)
 			vrele(vpold);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_kqfilter_f(struct file *fp, struct knote *kn)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 	struct thread *td;
 
 	td = curthread;
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error)
 		return (error);
 	error = dsw->d_kqfilter(dev, kn);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return (error);
 }
 
 static inline int
 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
 {
 	struct cdev_priv *cdp;
 	struct ucred *dcr;
 	struct proc *p;
 	int error;
 
 	cdp = de->de_cdp;
 	if (cdp == NULL)
 		return (0);
 	dcr = cdp->cdp_c.si_cred;
 	if (dcr == NULL)
 		return (0);
 
 	error = prison_check(td->td_ucred, dcr);
 	if (error == 0)
 		return (0);
 	/* We do, however, allow access to the controlling terminal */
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct thread *td;
 	struct devfs_dirent *de, *dd;
 	struct devfs_dirent **dde;
 	struct devfs_mount *dmp;
 	struct cdev *cdev;
 	int error, flags, nameiop, dvplocked;
 	char specname[SPECNAMELEN + 1], *pname;
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dvp = ap->a_dvp;
 	pname = cnp->cn_nameptr;
 	td = cnp->cn_thread;
 	flags = cnp->cn_flags;
 	nameiop = cnp->cn_nameiop;
 	dmp = VFSTODEVFS(dvp->v_mount);
 	dd = dvp->v_data;
 	*vpp = NULLVP;
 
 	if ((flags & ISLASTCN) && nameiop == RENAME)
 		return (EOPNOTSUPP);
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
 		return (EIO);
 
 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
 	if (error)
 		return (error);
 
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		de = devfs_parent_dirent(dd);
 		if (de == NULL)
 			return (ENOENT);
 		dvplocked = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp, 0);
 		error = devfs_allocv(de, dvp->v_mount,
 		    cnp->cn_lkflags & LK_TYPE_MASK, vpp);
 		*dm_unlock = 0;
 		vn_lock(dvp, dvplocked | LK_RETRY);
 		return (error);
 	}
 
 	dd = dvp->v_data;
 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
 	while (de == NULL) {	/* While(...) so we can use break */
 
 		if (nameiop == DELETE)
 			return (ENOENT);
 
 		/*
 		 * OK, we didn't have an entry for the name we were asked for
 		 * so we try to see if anybody can create it on demand.
 		 */
 		pname = devfs_fqpn(specname, dmp, dd, cnp);
 		if (pname == NULL)
 			break;
 
 		cdev = NULL;
 		DEVFS_DMP_HOLD(dmp);
 		sx_xunlock(&dmp->dm_lock);
 		sx_slock(&clone_drain_lock);
 		EVENTHANDLER_INVOKE(dev_clone,
 		    td->td_ucred, pname, strlen(pname), &cdev);
 		sx_sunlock(&clone_drain_lock);
 
 		if (cdev == NULL)
 			sx_xlock(&dmp->dm_lock);
 		else if (devfs_populate_vp(dvp) != 0) {
 			*dm_unlock = 0;
 			sx_xlock(&dmp->dm_lock);
 			if (DEVFS_DMP_DROP(dmp)) {
 				sx_xunlock(&dmp->dm_lock);
 				devfs_unmount_final(dmp);
 			} else
 				sx_xunlock(&dmp->dm_lock);
 			dev_rel(cdev);
 			return (ENOENT);
 		}
 		if (DEVFS_DMP_DROP(dmp)) {
 			*dm_unlock = 0;
 			sx_xunlock(&dmp->dm_lock);
 			devfs_unmount_final(dmp);
 			if (cdev != NULL)
 				dev_rel(cdev);
 			return (ENOENT);
 		}
 
 		if (cdev == NULL)
 			break;
 
 		dev_lock();
 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
 		if (dde != NULL && *dde != NULL)
 			de = *dde;
 		dev_unlock();
 		dev_rel(cdev);
 		break;
 	}
 
 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
 		if ((nameiop == CREATE || nameiop == RENAME) &&
 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 		return (ENOENT);
 	}
 
 	if (devfs_prison_check(de, td))
 		return (ENOENT);
 
 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			return (error);
 		if (*vpp == dvp) {
 			VREF(dvp);
 			*vpp = dvp;
 			return (0);
 		}
 	}
 	error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK,
 	    vpp);
 	*dm_unlock = 0;
 	return (error);
 }
 
 static int
 devfs_lookup(struct vop_lookup_args *ap)
 {
 	int j;
 	struct devfs_mount *dmp;
 	int dm_unlock;
 
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOTDIR);
 
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	dm_unlock = 1;
 	j = devfs_lookupx(ap, &dm_unlock);
 	if (dm_unlock == 1)
 		sx_xunlock(&dmp->dm_lock);
 	return (j);
 }
 
 static int
 devfs_mknod(struct vop_mknod_args *ap)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct devfs_dirent *dd, *de;
 	struct devfs_mount *dmp;
 	int error;
 
 	/*
 	 * The only type of node we should be creating here is a
 	 * character device, for anything else return EOPNOTSUPP.
 	 */
 	if (ap->a_vap->va_type != VCHR)
 		return (EOPNOTSUPP);
 	dvp = ap->a_dvp;
 	dmp = VFSTODEVFS(dvp->v_mount);
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dd = dvp->v_data;
 
 	error = ENOENT;
 	sx_xlock(&dmp->dm_lock);
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
 			continue;
 		if (de->de_dirent->d_type == DT_CHR &&
 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
 		    de->de_dirent->d_namlen) != 0)
 			continue;
 		if (de->de_flags & DE_WHITEOUT)
 			break;
 		goto notfound;
 	}
 	if (de == NULL)
 		goto notfound;
 	de->de_flags &= ~DE_WHITEOUT;
 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
 	return (error);
 notfound:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_open(struct vop_open_args *ap)
 {
 	struct thread *td = ap->a_td;
 	struct vnode *vp = ap->a_vp;
 	struct cdev *dev = vp->v_rdev;
 	struct file *fp = ap->a_fp;
 	int error, ref, vlocked;
 	struct cdevsw *dsw;
 	struct file *fpop;
 	struct mtx *mtxp;
 
 	if (vp->v_type == VBLK)
 		return (ENXIO);
 
 	if (dev == NULL)
 		return (ENXIO);
 
 	/* Make this field valid before any I/O in d_open. */
 	if (dev->si_iosize_max == 0)
 		dev->si_iosize_max = DFLTPHYS;
 
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	if (fp == NULL && dsw->d_fdopen != NULL) {
 		dev_relthread(dev, ref);
 		return (ENXIO);
 	}
 
 	vlocked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp, 0);
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	if (fp != NULL) {
 		fp->f_data = dev;
 		fp->f_vnode = vp;
 	}
 	if (dsw->d_fdopen != NULL)
 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 	else
 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 	/* Clean up any cdevpriv upon error. */
 	if (error != 0)
 		devfs_clear_cdevpriv();
 	td->td_fpop = fpop;
 
 	vn_lock(vp, vlocked | LK_RETRY);
 	dev_relthread(dev, ref);
 	if (error != 0) {
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 
 #if 0	/* /dev/console */
 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
 #else
 	if (fp == NULL)
 		return (error);
 #endif
 	if (fp->f_ops == &badfileops)
 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 
 	/*
 	 * Hint to the dofilewrite() to not force the buffer draining
 	 * on the writer to the file.  Most likely, the write would
 	 * not need normal buffers.
 	 */
 	mtx_lock(mtxp);
 	fp->f_vnread_flags |= FDEVFS_VNODE;
 	mtx_unlock(mtxp);
 	return (error);
 }
 
 static int
 devfs_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_MAC_PRESENT:
 #ifdef MAC
 		/*
 		 * If MAC is enabled, devfs automatically supports
 		 * trivial non-persistant label storage.
 		 */
 		*ap->a_retval = 1;
 #else
 		*ap->a_retval = 0;
 #endif
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 /* ARGSUSED */
 static int
 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_poll(fp, events, cred, td);
 		return (error);
 	}
 	error = dsw->d_poll(dev, events, td);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return(error);
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 devfs_print(struct vop_print_args *ap)
 {
 
 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
 	return (0);
 }
 
 static int
 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int ioflag, error, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_read(fp, uio, cred, flags, td);
 		return (error);
 	}
 	resid = uio->uio_resid;
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 	error = dsw->d_read(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
 		devfs_timestamp(&dev->si_atime);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
 	return (error);
 }
 
 static int
 devfs_readdir(struct vop_readdir_args *ap)
 {
 	int error;
 	struct uio *uio;
 	struct dirent *dp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	off_t off;
 	int *tmp_ncookies = NULL;
 
 	if (ap->a_vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	uio = ap->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * XXX: This is a temporary hack to get around this filesystem not
 	 * supporting cookies. We store the location of the ncookies pointer
 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
 	 * and set the number of cookies to 0. We then set the pointer to
 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on 
 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
 	 * pointer to its original location before returning to the caller.
 	 */
 	if (ap->a_ncookies != NULL) {
 		tmp_ncookies = ap->a_ncookies;
 		*ap->a_ncookies = 0;
 		ap->a_ncookies = NULL;
 	}
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	if (devfs_populate_vp(ap->a_vp) != 0) {
 		if (tmp_ncookies != NULL)
 			ap->a_ncookies = tmp_ncookies;
 		return (EIO);
 	}
 	error = 0;
 	de = ap->a_vp->v_data;
 	off = 0;
 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
 			continue;
 		if (devfs_prison_check(dd, uio->uio_td))
 			continue;
 		if (dd->de_dirent->d_type == DT_DIR)
 			de = dd->de_dir;
 		else
 			de = dd;
 		dp = dd->de_dirent;
+		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
 		if (dp->d_reclen > uio->uio_resid)
 			break;
 		dp->d_fileno = de->de_inode;
 		if (off >= uio->uio_offset) {
 			error = vfs_read_dirent(ap, dp, off);
 			if (error)
 				break;
 		}
 		off += dp->d_reclen;
 	}
 	sx_xunlock(&dmp->dm_lock);
 	uio->uio_offset = off;
 
 	/*
 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
 	 * place.
 	 */
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
 
 	return (error);
 }
 
 static int
 devfs_readlink(struct vop_readlink_args *ap)
 {
 	struct devfs_dirent *de;
 
 	de = ap->a_vp->v_data;
 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
 }
 
 static int
 devfs_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	mtx_lock(&devfs_de_interlock);
 	de = vp->v_data;
 	if (de != NULL) {
 		de->de_vnode = NULL;
 		vp->v_data = NULL;
 	}
 	mtx_unlock(&devfs_de_interlock);
 	vnode_destroy_vobject(vp);
 	return (0);
 }
 
 static int
 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct cdev *dev;
 
 	vp = ap->a_vp;
 	MPASS(vp->v_type == VCHR);
 
 	devfs_reclaim(ap);
 
 	VI_LOCK(vp);
 	dev_lock();
 	dev = vp->v_rdev;
 	vp->v_rdev = NULL;
 	if (dev != NULL)
 		dev->si_usecount -= vp->v_usecount;
 	dev_unlock();
 	VI_UNLOCK(vp);
 	if (dev != NULL)
 		dev_rel(dev);
 	return (0);
 }
 
 static int
 devfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered;
 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
 
 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
 
 	sx_xlock(&dmp->dm_lock);
 	dd = ap->a_dvp->v_data;
 	de = vp->v_data;
 	if (de->de_cdp == NULL) {
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_dirent->d_type == DT_LNK) {
 			de_covered = devfs_find(dd, de->de_dirent->d_name,
 			    de->de_dirent->d_namlen, 0);
 			if (de_covered != NULL)
 				de_covered->de_flags &= ~DE_COVERED;
 		}
 		/* We need to unlock dvp because devfs_delete() may lock it. */
 		VOP_UNLOCK(vp, 0);
 		if (dvp != vp)
 			VOP_UNLOCK(dvp, 0);
 		devfs_delete(dmp, de, 0);
 		sx_xunlock(&dmp->dm_lock);
 		if (dvp != vp)
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	} else {
 		de->de_flags |= DE_WHITEOUT;
 		sx_xunlock(&dmp->dm_lock);
 	}
 	return (0);
 }
 
 /*
  * Revoke is called on a tty when a terminal session ends.  The vnode
  * is orphaned by setting v_op to deadfs so we need to let go of it
  * as well so that we create a new one next time around.
  *
  */
 static int
 devfs_revoke(struct vop_revoke_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *vp2;
 	struct cdev *dev;
 	struct cdev_priv *cdp;
 	struct devfs_dirent *de;
 	u_int i;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
 
 	dev = vp->v_rdev;
 	cdp = cdev2priv(dev);
  
 	dev_lock();
 	cdp->cdp_inuse++;
 	dev_unlock();
 
 	vhold(vp);
 	vgone(vp);
 	vdrop(vp);
 
 	VOP_UNLOCK(vp,0);
  loop:
 	for (;;) {
 		mtx_lock(&devfs_de_interlock);
 		dev_lock();
 		vp2 = NULL;
 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
 			de = cdp->cdp_dirents[i];
 			if (de == NULL)
 				continue;
 
 			vp2 = de->de_vnode;
 			if (vp2 != NULL) {
 				dev_unlock();
 				VI_LOCK(vp2);
 				mtx_unlock(&devfs_de_interlock);
 				if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK,
 				    curthread))
 					goto loop;
 				vhold(vp2);
 				vgone(vp2);
 				vdrop(vp2);
 				vput(vp2);
 				break;
 			} 
 		}
 		if (vp2 != NULL) {
 			continue;
 		}
 		dev_unlock();
 		mtx_unlock(&devfs_de_interlock);
 		break;
 	}
 	dev_lock();
 	cdp->cdp_inuse--;
 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
 		dev_unlock();
 		dev_rel(&cdp->cdp_c);
 	} else
 		dev_unlock();
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	return (0);
 }
 
 static int
 devfs_rioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_mount *dmp;
 	int error;
 
 	vp = ap->a_vp;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	if (vp->v_iflag & VI_DOOMED) {
 		VOP_UNLOCK(vp, 0);
 		return (EBADF);
 	}
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
 	VOP_UNLOCK(vp, 0);
 	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ENOENT);
 	}
 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 static int
 devfs_rread(struct vop_read_args *ap)
 {
 
 	if (ap->a_vp->v_type != VDIR)
 		return (EINVAL);
 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
 }
 
 static int
 devfs_setattr(struct vop_setattr_args *ap)
 {
 	struct devfs_dirent *de;
 	struct vattr *vap;
 	struct vnode *vp;
 	struct thread *td;
 	int c, error;
 	uid_t uid;
 	gid_t gid;
 
 	vap = ap->a_vap;
 	vp = ap->a_vp;
 	td = curthread;
 	if ((vap->va_type != VNON) ||
 	    (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) ||
 	    (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) ||
 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
 	    (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) ||
 	    (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	c = 0;
 	if (vap->va_uid == (uid_t)VNOVAL)
 		uid = de->de_uid;
 	else
 		uid = vap->va_uid;
 	if (vap->va_gid == (gid_t)VNOVAL)
 		gid = de->de_gid;
 	else
 		gid = vap->va_gid;
 	if (uid != de->de_uid || gid != de->de_gid) {
 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
 			error = priv_check(td, PRIV_VFS_CHOWN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_uid = uid;
 		de->de_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (ap->a_cred->cr_uid != de->de_uid) {
 			error = priv_check(td, PRIV_VFS_ADMIN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_mode = vap->va_mode;
 		c = 1;
 	}
 
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
 		if (error != 0)
 			goto ret;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_atime = vap->va_atime;
 			else
 				de->de_atime = vap->va_atime;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_mtime = vap->va_mtime;
 			else
 				de->de_mtime = vap->va_mtime;
 		}
 		c = 1;
 	}
 
 	if (c) {
 		if (vp->v_type == VCHR)
 			vfs_timestamp(&vp->v_rdev->si_ctime);
 		else
 			vfs_timestamp(&de->de_mtime);
 	}
 
 ret:
 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
 	return (error);
 }
 
 #ifdef MAC
 static int
 devfs_setlabel(struct vop_setlabel_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	de = vp->v_data;
 
 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
 	mac_devfs_update(vp->v_mount, de, vp);
 
 	return (0);
 }
 #endif
 
 static int
 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_stat(fp, sb, cred, td));
 }
 
 static int
 devfs_symlink(struct vop_symlink_args *ap)
 {
 	int i, error;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered, *de_dotdot;
 	struct devfs_mount *dmp;
 
 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
 	if (error)
 		return(error);
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOENT);
 
 	dd = ap->a_dvp->v_data;
 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
 	de->de_flags = DE_USER;
 	de->de_uid = 0;
 	de->de_gid = 0;
 	de->de_mode = 0755;
 	de->de_inode = alloc_unr(devfs_inos);
 	de->de_dir = dd;
 	de->de_dirent->d_type = DT_LNK;
 	i = strlen(ap->a_target) + 1;
 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
 	bcopy(ap->a_target, de->de_symlink, i);
 #ifdef MAC
 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
 #endif
 	de_covered = devfs_find(dd, de->de_dirent->d_name,
 	    de->de_dirent->d_namlen, 0);
 	if (de_covered != NULL) {
 		if ((de_covered->de_flags & DE_USER) != 0) {
 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
 			sx_xunlock(&dmp->dm_lock);
 			return (EEXIST);
 		}
 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
 		    ("devfs_symlink: entry %p already covered", de_covered));
 		de_covered->de_flags |= DE_COVERED;
 	}
 
 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
 	devfs_dir_ref_de(dmp, dd);
 	devfs_rules_apply(dmp, de);
 
 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
 }
 
 static int
 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_truncate(fp, length, cred, td));
 }
 
 static int
 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int error, ioflag, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_write(fp, uio, cred, flags, td);
 		return (error);
 	}
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 
 	resid = uio->uio_resid;
 
 	error = dsw->d_write(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
 		devfs_timestamp(&dev->si_ctime);
 		dev->si_mtime = dev->si_ctime;
 	}
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
 	return (error);
 }
 
 static int
 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	struct mount *mp;
 	struct vnode *vp;
 	struct file *fpop;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error, ref;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 * The one exception is that D_MMAP_ANON devices
 	 * (i.e. /dev/zero) permit private writable mappings.
 	 *
 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
 	 * as well as updating maxprot to permit writing for
 	 * D_MMAP_ANON devices rather than doing that here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
 	    &object);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 	return (error);
 }
 
 dev_t
 dev2udev(struct cdev *x)
 {
 	if (x == NULL)
 		return (NODEV);
 	return (cdev2priv(x)->cdp_inode);
 }
 
 static struct fileops devfs_ops_f = {
 	.fo_read =	devfs_read_f,
 	.fo_write =	devfs_write_f,
 	.fo_truncate =	devfs_truncate_f,
 	.fo_ioctl =	devfs_ioctl_f,
 	.fo_poll =	devfs_poll_f,
 	.fo_kqfilter =	devfs_kqfilter_f,
 	.fo_stat =	devfs_stat_f,
 	.fo_close =	devfs_close_f,
 	.fo_chmod =	vn_chmod,
 	.fo_chown =	vn_chown,
 	.fo_sendfile =	vn_sendfile,
 	.fo_seek =	vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap =	devfs_mmap_f,
 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 /* Vops for non-CHR vnodes in /dev. */
 static struct vop_vector devfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_rioctl,
 	.vop_lookup =		devfs_lookup,
 	.vop_mknod =		devfs_mknod,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_read =		devfs_rread,
 	.vop_readdir =		devfs_readdir,
 	.vop_readlink =		devfs_readlink,
 	.vop_reclaim =		devfs_reclaim,
 	.vop_remove =		devfs_remove,
 	.vop_revoke =		devfs_revoke,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_symlink =		devfs_symlink,
 	.vop_vptocnp =		devfs_vptocnp,
 };
 
 /* Vops for VCHR vnodes in /dev. */
 static struct vop_vector devfs_specops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_bmap =		VOP_PANIC,
 	.vop_close =		devfs_close,
 	.vop_create =		VOP_PANIC,
 	.vop_fsync =		vop_stdfsync,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_ioctl,
 	.vop_link =		VOP_PANIC,
 	.vop_mkdir =		VOP_PANIC,
 	.vop_mknod =		VOP_PANIC,
 	.vop_open =		devfs_open,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_poll =		dead_poll,
 	.vop_print =		devfs_print,
 	.vop_read =		dead_read,
 	.vop_readdir =		VOP_PANIC,
 	.vop_readlink =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_reclaim =		devfs_reclaim_vchr,
 	.vop_remove =		devfs_remove,
 	.vop_rename =		VOP_PANIC,
 	.vop_revoke =		devfs_revoke,
 	.vop_rmdir =		VOP_PANIC,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_strategy =		VOP_PANIC,
 	.vop_symlink =		VOP_PANIC,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_write =		dead_write,
 };
 
 /*
  * Our calling convention to the device drivers used to be that we passed
  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 
  * flags instead since that's what open(), close() and ioctl() takes and
  * we don't really want vnode.h in device drivers.
  * We solved the source compatibility by redefining some vnode flags to
  * be the same as the fcntl ones and by sending down the bitwise OR of
  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
  * pulls the rug out under this.
  */
 CTASSERT(O_NONBLOCK == IO_NDELAY);
 CTASSERT(O_FSYNC == IO_SYNC);
Index: head/sys/fs/fdescfs/fdesc_vnops.c
===================================================================
--- head/sys/fs/fdescfs/fdesc_vnops.c	(revision 318735)
+++ head/sys/fs/fdescfs/fdesc_vnops.c	(revision 318736)
@@ -1,569 +1,569 @@
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fdesc_vnops.c	8.9 (Berkeley) 1/21/94
  *
  * $FreeBSD$
  */
 
 /*
  * /dev/fd Filesystem
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>	/* boottime */
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/file.h>	/* Must come after sys/malloc.h */
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 
 #include <fs/fdescfs/fdesc.h>
 
 #define	NFDCACHE 4
 #define FD_NHASH(ix) \
 	(&fdhashtbl[(ix) & fdhash])
 static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl;
 static u_long fdhash;
 
 struct mtx fdesc_hashmtx;
 
 static vop_getattr_t	fdesc_getattr;
 static vop_lookup_t	fdesc_lookup;
 static vop_open_t	fdesc_open;
 static vop_readdir_t	fdesc_readdir;
 static vop_reclaim_t	fdesc_reclaim;
 static vop_setattr_t	fdesc_setattr;
 
 static struct vop_vector fdesc_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		VOP_NULL,
 	.vop_getattr =		fdesc_getattr,
 	.vop_lookup =		fdesc_lookup,
 	.vop_open =		fdesc_open,
 	.vop_pathconf =		vop_stdpathconf,
 	.vop_readdir =		fdesc_readdir,
 	.vop_reclaim =		fdesc_reclaim,
 	.vop_setattr =		fdesc_setattr,
 };
 
 static void fdesc_insmntque_dtr(struct vnode *, void *);
 static void fdesc_remove_entry(struct fdescnode *);
 
 /*
  * Initialise cache headers
  */
 int
 fdesc_init(struct vfsconf *vfsp)
 {
 
 	mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF);
 	fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash);
 	return (0);
 }
 
 /*
  * Uninit ready for unload.
  */
 int
 fdesc_uninit(struct vfsconf *vfsp)
 {
 
 	hashdestroy(fdhashtbl, M_CACHE, fdhash);
 	mtx_destroy(&fdesc_hashmtx);
 	return (0);
 }
 
 /*
  * If allocating vnode fails, call this.
  */
 static void
 fdesc_insmntque_dtr(struct vnode *vp, void *arg)
 {
 
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * Remove an entry from the hash if it exists.
  */
 static void
 fdesc_remove_entry(struct fdescnode *fd)
 {
 	struct fdhashhead *fc;
 	struct fdescnode *fd2;
 
 	fc = FD_NHASH(fd->fd_ix);
 	mtx_lock(&fdesc_hashmtx);
 	LIST_FOREACH(fd2, fc, fd_hash) {
 		if (fd == fd2) {
 			LIST_REMOVE(fd, fd_hash);
 			break;
 		}
 	}
 	mtx_unlock(&fdesc_hashmtx);
 }
 
 int
 fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp,
     struct vnode **vpp)
 {
 	struct fdescmount *fmp;
 	struct fdhashhead *fc;
 	struct fdescnode *fd, *fd2;
 	struct vnode *vp, *vp2;
 	struct thread *td;
 	int error = 0;
 
 	td = curthread;
 	fc = FD_NHASH(ix);
 loop:
 	mtx_lock(&fdesc_hashmtx);
 	/*
 	 * If a forced unmount is progressing, we need to drop it. The flags are
 	 * protected by the hashmtx.
 	 */
 	fmp = (struct fdescmount *)mp->mnt_data;
 	if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) {
 		mtx_unlock(&fdesc_hashmtx);
 		return (-1);
 	}
 
 	LIST_FOREACH(fd, fc, fd_hash) {
 		if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
 			/* Get reference to vnode in case it's being free'd */
 			vp = fd->fd_vnode;
 			VI_LOCK(vp);
 			mtx_unlock(&fdesc_hashmtx);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td))
 				goto loop;
 			*vpp = vp;
 			return (0);
 		}
 	}
 	mtx_unlock(&fdesc_hashmtx);
 
 	fd = malloc(sizeof(struct fdescnode), M_TEMP, M_WAITOK);
 
 	error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp);
 	if (error) {
 		free(fd, M_TEMP);
 		return (error);
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_data = fd;
 	fd->fd_vnode = vp;
 	fd->fd_type = ftype;
 	fd->fd_fd = fd_fd;
 	fd->fd_ix = ix;
 	error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL);
 	if (error != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 
 	/* Make sure that someone didn't beat us when inserting the vnode. */
 	mtx_lock(&fdesc_hashmtx);
 	/*
 	 * If a forced unmount is progressing, we need to drop it. The flags are
 	 * protected by the hashmtx.
 	 */
 	fmp = (struct fdescmount *)mp->mnt_data;
 	if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) {
 		mtx_unlock(&fdesc_hashmtx);
 		vgone(vp);
 		vput(vp);
 		*vpp = NULLVP;
 		return (-1);
 	}
 
 	LIST_FOREACH(fd2, fc, fd_hash) {
 		if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) {
 			/* Get reference to vnode in case it's being free'd */
 			vp2 = fd2->fd_vnode;
 			VI_LOCK(vp2);
 			mtx_unlock(&fdesc_hashmtx);
 			error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td);
 			/* Someone beat us, dec use count and wait for reclaim */
 			vgone(vp);
 			vput(vp);
 			/* If we didn't get it, return no vnode. */
 			if (error)
 				vp2 = NULLVP;
 			*vpp = vp2;
 			return (error);
 		}
 	}
 
 	/* If we came here, we can insert it safely. */
 	LIST_INSERT_HEAD(fc, fd, fd_hash);
 	mtx_unlock(&fdesc_hashmtx);
 	*vpp = vp;
 	return (0);
 }
 
 struct fdesc_get_ino_args {
 	fdntype ftype;
 	unsigned fd_fd;
 	int ix;
 	struct file *fp;
 	struct thread *td;
 };
 
 static int
 fdesc_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 	struct fdesc_get_ino_args *a;
 	int error;
 
 	a = arg;
 	error = fdesc_allocvp(a->ftype, a->fd_fd, a->ix, mp, rvp);
 	fdrop(a->fp, a->td);
 	return (error);
 }
 
 
 /*
  * vp is the current namei directory
  * ndp is the name to locate in that directory...
  */
 static int
 fdesc_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode **vpp = ap->a_vpp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	char *pname = cnp->cn_nameptr;
 	struct thread *td = cnp->cn_thread;
 	struct file *fp;
 	struct fdesc_get_ino_args arg;
 	cap_rights_t rights;
 	int nlen = cnp->cn_namelen;
 	u_int fd, fd1;
 	int error;
 	struct vnode *fvp;
 
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 		error = EROFS;
 		goto bad;
 	}
 
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	if (VTOFDESC(dvp)->fd_type != Froot) {
 		error = ENOTDIR;
 		goto bad;
 	}
 
 	fd = 0;
 	/* the only time a leading 0 is acceptable is if it's "0" */
 	if (*pname == '0' && nlen != 1) {
 		error = ENOENT;
 		goto bad;
 	}
 	while (nlen--) {
 		if (*pname < '0' || *pname > '9') {
 			error = ENOENT;
 			goto bad;
 		}
 		fd1 = 10 * fd + *pname++ - '0';
 		if (fd1 < fd) {
 			error = ENOENT;
 			goto bad;
 		}
 		fd = fd1;
 	}
 
 	/*
 	 * No rights to check since 'fp' isn't actually used.
 	 */
 	if ((error = fget(td, fd, cap_rights_init(&rights), &fp)) != 0)
 		goto bad;
 
 	/* Check if we're looking up ourselves. */
 	if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) {
 		/*
 		 * In case we're holding the last reference to the file, the dvp
 		 * will be re-acquired.
 		 */
 		vhold(dvp);
 		VOP_UNLOCK(dvp, 0);
 		fdrop(fp, td);
 
 		/* Re-aquire the lock afterwards. */
 		vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE);
 		vdrop(dvp);
 		fvp = dvp;
 		if ((dvp->v_iflag & VI_DOOMED) != 0)
 			error = ENOENT;
 	} else {
 		/*
 		 * Unlock our root node (dvp) when doing this, since we might
 		 * deadlock since the vnode might be locked by another thread
 		 * and the root vnode lock will be obtained afterwards (in case
 		 * we're looking up the fd of the root vnode), which will be the
 		 * opposite lock order. Vhold the root vnode first so we don't
 		 * lose it.
 		 */
 		arg.ftype = Fdesc;
 		arg.fd_fd = fd;
 		arg.ix = FD_DESC + fd;
 		arg.fp = fp;
 		arg.td = td;
 		error = vn_vget_ino_gen(dvp, fdesc_get_ino_alloc, &arg,
 		    LK_EXCLUSIVE, &fvp);
 	}
 	
 	if (error)
 		goto bad;
 	*vpp = fvp;
 	return (0);
 
 bad:
 	*vpp = NULL;
 	return (error);
 }
 
 static int
 fdesc_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	if (VTOFDESC(vp)->fd_type == Froot)
 		return (0);
 
 	/*
 	 * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the file
 	 * descriptor being sought for duplication. The error return ensures
 	 * that the vnode for this device will be released by vn_open. Open
 	 * will detect this special error and take the actions in dupfdopen.
 	 * Other callers of vn_open or VOP_OPEN will simply report the
 	 * error.
 	 */
 	ap->a_td->td_dupfd = VTOFDESC(vp)->fd_fd;	/* XXX */
 	return (ENODEV);
 }
 
 static int
 fdesc_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct timeval boottime;
 
 	getboottime(&boottime);
 	vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
 	vap->va_fileid = VTOFDESC(vp)->fd_ix;
 	vap->va_uid = 0;
 	vap->va_gid = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_atime.tv_sec = boottime.tv_sec;
 	vap->va_atime.tv_nsec = 0;
 	vap->va_mtime = vap->va_atime;
 	vap->va_ctime = vap->va_mtime;
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_bytes = 0;
 	vap->va_filerev = 0;
 
 	switch (VTOFDESC(vp)->fd_type) {
 	case Froot:
 		vap->va_type = VDIR;
 		vap->va_nlink = 2;
 		vap->va_size = DEV_BSIZE;
 		vap->va_rdev = NODEV;
 		break;
 
 	case Fdesc:
 		vap->va_type = VCHR;
 		vap->va_nlink = 1;
 		vap->va_size = 0;
 		vap->va_rdev = makedev(0, vap->va_fileid);
 		break;
 
 	default:
 		panic("fdesc_getattr");
 		break;
 	}
 
 	vp->v_type = vap->va_type;
 	return (0);
 }
 
 static int
 fdesc_setattr(struct vop_setattr_args *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	struct thread *td = curthread;
 	cap_rights_t rights;
 	unsigned fd;
 	int error;
 
 	/*
 	 * Can't mess with the root vnode
 	 */
 	if (VTOFDESC(ap->a_vp)->fd_type == Froot)
 		return (EACCES);
 
 	fd = VTOFDESC(ap->a_vp)->fd_fd;
 
 	/*
 	 * Allow setattr where there is an underlying vnode.
 	 */
 	error = getvnode(td, fd,
 	    cap_rights_init(&rights, CAP_EXTATTR_SET), &fp);
 	if (error) {
 		/*
 		 * getvnode() returns EINVAL if the file descriptor is not
 		 * backed by a vnode.  Silently drop all changes except
 		 * chflags(2) in this case.
 		 */
 		if (error == EINVAL) {
 			if (vap->va_flags != VNOVAL)
 				error = EOPNOTSUPP;
 			else
 				error = 0;
 		}
 		return (error);
 	}
 	vp = fp->f_vnode;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) == 0) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred);
 		VOP_UNLOCK(vp, 0);
 		vn_finished_write(mp);
 	}
 	fdrop(fp, td);
 	return (error);
 }
 
-#define UIO_MX 16
+#define UIO_MX _GENERIC_DIRLEN(10) /* number of symbols in INT_MAX printout */
 
 static int
 fdesc_readdir(struct vop_readdir_args *ap)
 {
 	struct uio *uio = ap->a_uio;
 	struct filedesc *fdp;
 	struct dirent d;
 	struct dirent *dp = &d;
 	int error, i, off, fcnt;
 
 	if (VTOFDESC(ap->a_vp)->fd_type != Froot)
 		panic("fdesc_readdir: not dir");
 
 	if (ap->a_ncookies != NULL)
 		*ap->a_ncookies = 0;
 
 	off = (int)uio->uio_offset;
 	if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
 	    uio->uio_resid < UIO_MX)
 		return (EINVAL);
 	i = (u_int)off / UIO_MX;
 	fdp = uio->uio_td->td_proc->p_fd;
 	error = 0;
 
 	fcnt = i - 2;		/* The first two nodes are `.' and `..' */
 
 	FILEDESC_SLOCK(fdp);
 	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
 		bzero((caddr_t)dp, UIO_MX);
 		switch (i) {
 		case 0:	/* `.' */
 		case 1: /* `..' */
 			dp->d_fileno = i + FD_ROOT;
 			dp->d_namlen = i + 1;
 			dp->d_reclen = UIO_MX;
 			bcopy("..", dp->d_name, dp->d_namlen);
 			dp->d_name[i + 1] = '\0';
 			dp->d_type = DT_DIR;
 			break;
 		default:
 			if (fdp->fd_ofiles[fcnt].fde_file == NULL)
 				break;
 			dp->d_namlen = sprintf(dp->d_name, "%d", fcnt);
 			dp->d_reclen = UIO_MX;
 			dp->d_type = DT_CHR;
 			dp->d_fileno = i + FD_DESC;
 			break;
 		}
 		if (dp->d_namlen != 0) {
 			/*
 			 * And ship to userland
 			 */
 			FILEDESC_SUNLOCK(fdp);
 			error = uiomove(dp, UIO_MX, uio);
 			if (error)
 				goto done;
 			FILEDESC_SLOCK(fdp);
 		}
 		i++;
 		fcnt++;
 	}
 	FILEDESC_SUNLOCK(fdp);
 
 done:
 	uio->uio_offset = i * UIO_MX;
 	return (error);
 }
 
 static int
 fdesc_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct fdescnode *fd;
 
  	vp = ap->a_vp;
  	fd = VTOFDESC(vp);
 	fdesc_remove_entry(fd);
 	free(vp->v_data, M_TEMP);
 	vp->v_data = NULL;
 	return (0);
 }
Index: head/sys/fs/nandfs/nandfs_fs.h
===================================================================
--- head/sys/fs/nandfs/nandfs_fs.h	(revision 318735)
+++ head/sys/fs/nandfs/nandfs_fs.h	(revision 318736)
@@ -1,565 +1,565 @@
 /*-
  * Copyright (c) 2010-2012 Semihalf
  * Copyright (c) 2008, 2009 Reinoud Zandijk
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Original definitions written by Koji Sato <koji@osrg.net>
  *                    and Ryusuke Konishi <ryusuke@osrg.net>
  * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud
  *
  * $FreeBSD$
  */
 
 #ifndef _NANDFS_FS_H
 #define _NANDFS_FS_H
 
 #include <sys/uuid.h>
 
 #define	MNINDIR(fsdev)	((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t))
 
 /*
  * Inode structure. There are a few dedicated inode numbers that are
  * defined here first.
  */
 #define	NANDFS_WHT_INO		1	/* Whiteout ino			*/
 #define	NANDFS_ROOT_INO		2	/* Root file inode		*/
 #define	NANDFS_DAT_INO		3	/* DAT file			*/
 #define	NANDFS_CPFILE_INO	4	/* checkpoint file		*/
 #define	NANDFS_SUFILE_INO	5	/* segment usage file		*/
 #define	NANDFS_IFILE_INO	6	/* ifile			*/
 #define	NANDFS_GC_INO		7	/* Cleanerd node		*/
 #define	NANDFS_ATIME_INO	8	/* Atime file (reserved)	*/
 #define	NANDFS_XATTR_INO	9	/* Xattribute file (reserved)	*/
 #define	NANDFS_SKETCH_INO	10	/* Sketch file (obsolete)	*/
 #define	NANDFS_USER_INO		11	/* First user's file inode number */
 
 #define	NANDFS_SYS_NODE(ino) \
 	(((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO))
 
 #define	NANDFS_NDADDR		12	/* Direct addresses in inode. */
 #define	NANDFS_NIADDR		3	/* Indirect addresses in inode. */
 
 typedef	int64_t		nandfs_daddr_t;
 typedef	int64_t		nandfs_lbn_t;
 
 struct nandfs_inode {
 	uint64_t	i_blocks;	/* 0: size in device blocks		*/
 	uint64_t	i_size;		/* 8: size in bytes			*/
 	uint64_t	i_ctime;	/* 16: creation time in seconds		*/
 	uint64_t	i_mtime;	/* 24: modification time in seconds part*/
 	uint32_t	i_ctime_nsec;	/* 32: creation time nanoseconds part	*/
 	uint32_t	i_mtime_nsec;	/* 36: modification time in nanoseconds	*/
 	uint32_t	i_uid;		/* 40: user id				*/
 	uint32_t	i_gid;		/* 44: group id				*/
 	uint16_t	i_mode;		/* 48: file mode			*/
 	uint16_t	i_links_count;	/* 50: number of references to the inode*/
 	uint32_t	i_flags;	/* 52: NANDFS_*_FL flags		*/
 	nandfs_daddr_t	i_special;	/* 56: special				*/
 	nandfs_daddr_t	i_db[NANDFS_NDADDR]; /* 64: Direct disk blocks.		*/
 	nandfs_daddr_t	i_ib[NANDFS_NIADDR]; /* 160: Indirect disk blocks.	*/
 	uint64_t	i_xattr;	/* 184: reserved for extended attributes*/
 	uint32_t	i_generation;	/* 192: file generation for NFS		*/
 	uint32_t	i_pad[15];	/* 196: make it 64 bits aligned		*/
 };
 
 #ifdef _KERNEL
 CTASSERT(sizeof(struct nandfs_inode) == 256);
 #endif
 
 /*
  * Each checkpoint/snapshot has a super root.
  *
  * The super root holds the inodes of the three system files: `dat', `cp' and
  * 'su' files. All other FS state is defined by those.
  *
  * It is CRC checksum'ed and time stamped.
  */
 
 struct nandfs_super_root {
 	uint32_t	sr_sum;		/* check-sum				*/
 	uint16_t	sr_bytes;	/* byte count of this structure		*/
 	uint16_t	sr_flags;	/* reserved for flags			*/
 	uint64_t	sr_nongc_ctime;	/* timestamp, not for cleaner(?)	*/
 	struct nandfs_inode sr_dat;	/* DAT, virt->phys translation inode	*/
 	struct nandfs_inode sr_cpfile;	/* CP, checkpoints inode		*/
 	struct nandfs_inode sr_sufile;	/* SU, segment usage inode		*/
 };
 
 #define	NANDFS_SR_MDT_OFFSET(inode_size, i)			\
 	((uint32_t)&((struct nandfs_super_root *)0)->sr_dat +	\
 	(inode_size) * (i))
 
 #define	NANDFS_SR_DAT_OFFSET(inode_size)	NANDFS_SR_MDT_OFFSET(inode_size, 0)
 #define	NANDFS_SR_CPFILE_OFFSET(inode_size)	NANDFS_SR_MDT_OFFSET(inode_size, 1)
 #define	NANDFS_SR_SUFILE_OFFSET(inode_size)	NANDFS_SR_MDT_OFFSET(inode_size, 2)
 #define	NANDFS_SR_BYTES			(sizeof(struct nandfs_super_root))
 
 /*
  * The superblock describes the basic structure and mount history. It also
  * records some sizes of structures found on the disc for sanity checks.
  *
  * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and
  * NANDFS_SB2_OFFSET_BYTES.
  */
 
 /* File system states stored on media in superblock's sbp->s_state */
 #define	NANDFS_VALID_FS		0x0001	/* cleanly unmounted and all is ok  */
 #define	NANDFS_ERROR_FS		0x0002	/* there were errors detected, fsck */
 #define	NANDFS_RESIZE_FS	0x0004	/* resize required, XXX unknown flag*/
 #define	NANDFS_MOUNT_STATE_BITS	"\20\1VALID_FS\2ERROR_FS\3RESIZE_FS"
 
 /*
  * Brief description of control structures:
  *
  * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks.
  * Simple round-robin policy is used in order to choose which block will
  * contain new super block.
  *
  * Simple case with 2 blocks:
  * 1: fsdata sblock1 [sblock3 [sblock5 ..]]
  * 2: fsdata sblock2 [sblock4 [sblock6 ..]]
  */
 struct nandfs_fsdata {
 	uint16_t	f_magic;
 	uint16_t	f_bytes;
 
 	uint32_t	f_sum;		/* checksum of fsdata		*/
 	uint32_t	f_rev_level;	/* major disk format revision	*/
 
 	uint64_t	f_ctime;	/* creation time (execution time
 					   of newfs)			*/
 	/* Block size represented as: blocksize = 1 << (f_log_block_size + 10)	*/
 	uint32_t	f_log_block_size;
 
 	uint16_t	f_inode_size;		/* size of an inode		*/
 	uint16_t	f_dat_entry_size;	/* size of a dat entry		*/
 	uint16_t	f_checkpoint_size;	/* size of a checkpoint		*/
 	uint16_t	f_segment_usage_size;	/* size of a segment usage	*/
 
 	uint16_t	f_sbbytes;		/* byte count of CRC calculation
 						   for super blocks. s_reserved
 						   is excluded!			*/
 
 	uint16_t	f_errors;		/* behaviour on detecting errors	*/
 
 	uint32_t	f_erasesize;
 	uint64_t	f_nsegments;		/* number of segm. in filesystem	*/
 	nandfs_daddr_t	f_first_data_block;	/* 1st seg disk block number		*/
 	uint32_t	f_blocks_per_segment;	/* number of blocks per segment		*/
 	uint32_t	f_r_segments_percentage;	/* reserved segments percentage		*/
 
 	struct uuid	f_uuid;			/* 128-bit uuid for volume		*/
 	char		f_volume_name[16];	/* volume name				*/
 	uint32_t	f_pad[104];
 } __packed;
 
 #ifdef _KERNEL
 CTASSERT(sizeof(struct nandfs_fsdata) == 512);
 #endif
 
 struct nandfs_super_block {
 	uint16_t	s_magic;		/* magic value for identification */
 
 	uint32_t	s_sum;			/* check sum of super block       */
 
 	uint64_t	s_last_cno;		/* last checkpoint number         */
 	uint64_t	s_last_pseg;		/* addr part. segm. written last  */
 	uint64_t	s_last_seq;		/* seq.number of seg written last */
 	uint64_t	s_free_blocks_count;	/* free blocks count              */
 
 	uint64_t	s_mtime;		/* mount time                     */
 	uint64_t	s_wtime;		/* write time                     */
 	uint16_t	s_state;		/* file system state              */
 
 	char		s_last_mounted[64];	/* directory where last mounted   */
 
 	uint32_t	s_c_interval;		/* commit interval of segment     */
 	uint32_t	s_c_block_max;		/* threshold of data amount for
 						   the segment construction */
 	uint32_t	s_reserved[32];		/* padding to end of the block    */
 } __packed;
 
 #ifdef _KERNEL
 CTASSERT(sizeof(struct nandfs_super_block) == 256);
 #endif
 
 #define	NANDFS_FSDATA_MAGIC	0xf8da
 #define	NANDFS_SUPER_MAGIC	0x8008
 
 #define	NANDFS_NFSAREAS		4
 #define	NANDFS_DATA_OFFSET_BYTES(esize)	(NANDFS_NFSAREAS * (esize))
 
 #define	NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata))
 
 #define	NANDFS_DEF_BLOCKSIZE	4096
 #define	NANDFS_MIN_BLOCKSIZE	512
 
 #define	NANDFS_DEF_ERASESIZE	(2 << 16)
 
 #define	NANDFS_MIN_SEGSIZE	NANDFS_DEF_ERASESIZE
 
 #define	NANDFS_CURRENT_REV	9	/* current major revision */
 
 #define	NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad)
 /* Bytes count of super_block for CRC-calculation */
 #define	NANDFS_SB_BYTES  offsetof(struct nandfs_super_block, s_reserved)
 
 /* Maximal count of links to a file */
 #define	NANDFS_LINK_MAX		32000
 
 /*
  * Structure of a directory entry.
  *
  * Note that they can't span blocks; the rec_len fills out.
  */
 
 #define	NANDFS_NAME_LEN 255
 struct nandfs_dir_entry {
 	uint64_t	inode;			/* inode number */
 	uint16_t	rec_len;		/* directory entry length */
 	uint8_t		name_len;		/* name length */
 	uint8_t		file_type;
 	char		name[NANDFS_NAME_LEN];	/* file name */
 	char		pad;
 };
 
 /*
  * NANDFS_DIR_PAD defines the directory entries boundaries
  *
  * NOTE: It must be a multiple of 8
  */
 #define	NANDFS_DIR_PAD			8
 #define	NANDFS_DIR_ROUND		(NANDFS_DIR_PAD - 1)
 #define	NANDFS_DIR_NAME_OFFSET		(offsetof(struct nandfs_dir_entry, name))
 #define	NANDFS_DIR_REC_LEN(name_len)					\
 	(((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND)	\
 	& ~NANDFS_DIR_ROUND)
 #define	NANDFS_DIR_NAME_LEN(name_len)	\
 	(NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET)
 
 /*
  * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is
  * filled with one or more partial segments of variable lengths.
  *
  * Each partial segment has a segment summary header followed by updates of
  * files and optionally a super root.
  */
 
 /*
  * Virtual to physical block translation information. For data blocks it maps
  * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non
  * datablocks it is the virtual block number assigned to an indirect block
  * and has no bi_blkoff. The physical block number is the next
  * available data block in the partial segment after all the binfo's.
  */
 struct nandfs_binfo_v {
 	uint64_t	bi_ino;		/* file's inode			     */
 	uint64_t	bi_vblocknr;	/* assigned virtual block number     */
 	uint64_t	bi_blkoff;	/* for file's logical block number   */
 };
 
 /*
  * DAT allocation. For data blocks just the logical block number that maps on
  * the next available data block in the partial segment after the binfo's.
  */
 struct nandfs_binfo_dat {
 	uint64_t	bi_ino;
 	uint64_t	bi_blkoff;	/* DAT file's logical block number */
 	uint8_t		bi_level;	/* whether this is meta block */
 	uint8_t		bi_pad[7];
 };
 
 #ifdef _KERNEL
 CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat));
 #endif
 
 /* Convenience union for both types of binfo's */
 union nandfs_binfo {
 	struct nandfs_binfo_v bi_v;
 	struct nandfs_binfo_dat bi_dat;
 };
 
 /* Indirect buffers path */
 struct nandfs_indir {
 	nandfs_daddr_t	in_lbn;
 	int		in_off;
 };
 
 /* The (partial) segment summary */
 struct nandfs_segment_summary {
 	uint32_t	ss_datasum;	/* CRC of complete data block        */
 	uint32_t	ss_sumsum;	/* CRC of segment summary only       */
 	uint32_t	ss_magic;	/* magic to identify segment summary */
 	uint16_t	ss_bytes;	/* size of segment summary structure */
 	uint16_t	ss_flags;	/* NANDFS_SS_* flags                  */
 	uint64_t	ss_seq;		/* sequence number of this segm. sum */
 	uint64_t	ss_create;	/* creation timestamp in seconds     */
 	uint64_t	ss_next;	/* blocknumber of next segment       */
 	uint32_t	ss_nblocks;	/* number of blocks used by summary  */
 	uint32_t	ss_nbinfos;	/* number of binfo structures	     */
 	uint32_t	ss_sumbytes;	/* total size of segment summary     */
 	uint32_t	ss_pad;
 	/* stream of binfo structures */
 };
 
 #define	NANDFS_SEGSUM_MAGIC	0x8e680011	/* segment summary magic number */
 
 /* Segment summary flags */
 #define	NANDFS_SS_LOGBGN	0x0001	/* begins a logical segment */
 #define	NANDFS_SS_LOGEND	0x0002	/* ends a logical segment */
 #define	NANDFS_SS_SR		0x0004	/* has super root */
 #define	NANDFS_SS_SYNDT		0x0008	/* includes data only updates */
 #define	NANDFS_SS_GC		0x0010	/* segment written for cleaner operation */
 #define	NANDFS_SS_FLAG_BITS	"\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC"
 
 /* Segment summary constrains */
 #define	NANDFS_SEG_MIN_BLOCKS	16	/* minimum number of blocks in a
 					   full segment */
 #define	NANDFS_PSEG_MIN_BLOCKS	2	/* minimum number of blocks in a
 					   partial segment */
 #define	NANDFS_MIN_NRSVSEGS	8	/* minimum number of reserved
 					   segments */
 
 /*
  * Structure of DAT/inode file.
  *
  * A DAT file is divided into groups. The maximum number of groups is the
  * number of block group descriptors that fit into one block; this descriptor
  * only gives the number of free entries in the associated group.
  *
  * Each group has a block sized bitmap indicating if an entry is taken or
  * empty. Each bit stands for a DAT entry.
  *
  * The inode file has exactly the same format only the entries are inode
  * entries.
  */
 
 struct nandfs_block_group_desc {
 	uint32_t	bg_nfrees;	/* num. free entries in block group  */
 };
 
 /* DAT entry in a super root's DAT file */
 struct nandfs_dat_entry {
 	uint64_t	de_blocknr;	/* block number                      */
 	uint64_t	de_start;	/* valid from checkpoint             */
 	uint64_t	de_end;		/* valid till checkpoint             */
 	uint64_t	de_rsv;		/* reserved for future use           */
 };
 
 /*
  * Structure of CP file.
  *
  * A snapshot is just a checkpoint only it's protected against removal by the
  * cleaner. The snapshots are kept on a double linked list of checkpoints.
  */
 struct nandfs_snapshot_list {
 	uint64_t	ssl_next;	/* checkpoint nr. forward */
 	uint64_t	ssl_prev;	/* checkpoint nr. back    */
 };
 
 /* Checkpoint entry structure */
 struct nandfs_checkpoint {
 	uint32_t	cp_flags;		/* NANDFS_CHECKPOINT_* flags          */
 	uint32_t	cp_checkpoints_count;	/* ZERO, not used anymore?           */
 	struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots   */
 	uint64_t	cp_cno;			/* checkpoint number                 */
 	uint64_t	cp_create;		/* creation timestamp                */
 	uint64_t	cp_nblk_inc;		/* number of blocks incremented      */
 	uint64_t	cp_blocks_count;	/* reserved (might be deleted)       */
 	struct nandfs_inode cp_ifile_inode;	/* inode file inode          */
 };
 
 /* Checkpoint flags */
 #define	NANDFS_CHECKPOINT_SNAPSHOT	1
 #define	NANDFS_CHECKPOINT_INVALID	2
 #define	NANDFS_CHECKPOINT_SKETCH	4
 #define	NANDFS_CHECKPOINT_MINOR		8
 #define	NANDFS_CHECKPOINT_BITS		"\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR"
 
 /* Header of the checkpoint file */
 struct nandfs_cpfile_header {
 	uint64_t	ch_ncheckpoints;	/* number of checkpoints             */
 	uint64_t	ch_nsnapshots;	/* number of snapshots               */
 	struct nandfs_snapshot_list ch_snapshot_list;	/* snapshot list     */
 };
 
 #define	NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET		\
 	((sizeof(struct nandfs_cpfile_header) +		\
 	sizeof(struct nandfs_checkpoint) - 1) /		\
 	sizeof(struct nandfs_checkpoint))
 
 
 #define NANDFS_NOSEGMENT        0xffffffff
 
 /*
  * Structure of SU file.
  *
  * The segment usage file sums up how each of the segments are used. They are
  * indexed by their segment number.
  */
 
 /* Segment usage entry */
 struct nandfs_segment_usage {
 	uint64_t	su_lastmod;	/* last modified timestamp           */
 	uint32_t	su_nblocks;	/* number of blocks in segment       */
 	uint32_t	su_flags;	/* NANDFS_SEGMENT_USAGE_* flags       */
 };
 
 /* Segment usage flag */
 #define	NANDFS_SEGMENT_USAGE_ACTIVE	1
 #define	NANDFS_SEGMENT_USAGE_DIRTY	2
 #define	NANDFS_SEGMENT_USAGE_ERROR	4
 #define	NANDFS_SEGMENT_USAGE_GC		8
 #define	NANDFS_SEGMENT_USAGE_BITS	"\20\1ACTIVE\2DIRTY\3ERROR"
 
 /* Header of the segment usage file */
 struct nandfs_sufile_header {
 	uint64_t	sh_ncleansegs;	/* number of segments marked clean   */
 	uint64_t	sh_ndirtysegs;	/* number of segments marked dirty   */
 	uint64_t	sh_last_alloc;	/* last allocated segment number     */
 };
 
 #define	NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET	\
 	((sizeof(struct nandfs_sufile_header) +		\
 	sizeof(struct nandfs_segment_usage) - 1) /	\
 	sizeof(struct nandfs_segment_usage))
 
 struct nandfs_seg_stat {
 	uint64_t	nss_nsegs;
 	uint64_t	nss_ncleansegs;
 	uint64_t	nss_ndirtysegs;
 	uint64_t	nss_ctime;
 	uint64_t	nss_nongc_ctime;
 	uint64_t	nss_prot_seq;
 };
 
 enum {
 	NANDFS_CHECKPOINT,
 	NANDFS_SNAPSHOT
 };
 
 #define	NANDFS_CPINFO_MAX		512
 
 struct nandfs_cpinfo {
 	uint32_t	nci_flags;
 	uint32_t	nci_pad;
 	uint64_t	nci_cno;
 	uint64_t	nci_create;
 	uint64_t	nci_nblk_inc;
 	uint64_t	nci_blocks_count;
 	uint64_t	nci_next;
 };
 
 #define	NANDFS_SEGMENTS_MAX	512
 
 struct nandfs_suinfo {
 	uint64_t	nsi_num;
 	uint64_t	nsi_lastmod;
 	uint32_t	nsi_blocks;
 	uint32_t	nsi_flags;
 };
 
 #define	NANDFS_VINFO_MAX	512
 
 struct nandfs_vinfo {
 	uint64_t	nvi_ino;
 	uint64_t	nvi_vblocknr;
 	uint64_t	nvi_start;
 	uint64_t	nvi_end;
 	uint64_t	nvi_blocknr;
 	int		nvi_alive;
 };
 
 struct nandfs_cpmode {
 	uint64_t	ncpm_cno;
 	uint32_t	ncpm_mode;
 	uint32_t	ncpm_pad;
 };
 
 struct nandfs_argv {
 	uint64_t	nv_base;
 	uint32_t	nv_nmembs;
 	uint16_t	nv_size;
 	uint16_t	nv_flags;
 	uint64_t	nv_index;
 };
 
 struct nandfs_cpstat {
 	uint64_t	ncp_cno;
 	uint64_t	ncp_ncps;
 	uint64_t	ncp_nss;
 };
 
 struct nandfs_period {
 	uint64_t	p_start;
 	uint64_t	p_end;
 };
 
 struct nandfs_vdesc {
 	uint64_t	vd_ino;
 	uint64_t	vd_cno;
 	uint64_t	vd_vblocknr;
 	struct nandfs_period	vd_period;
 	uint64_t	vd_blocknr;
 	uint64_t	vd_offset;
 	uint32_t	vd_flags;
 	uint32_t	vd_pad;
 };
 
 struct nandfs_bdesc {
 	uint64_t	bd_ino;
 	uint64_t	bd_oblocknr;
 	uint64_t	bd_blocknr;
 	uint64_t	bd_offset;
 	uint32_t	bd_level;
 	uint32_t	bd_alive;
 };
 
 #ifndef _KERNEL
 #ifndef	MNAMELEN
-#define	MNAMELEN	88
+#define	MNAMELEN	1024
 #endif
 #endif
 
 struct nandfs_fsinfo {
 	struct nandfs_fsdata		fs_fsdata;
 	struct nandfs_super_block	fs_super;
 	char				fs_dev[MNAMELEN];
 };
 
 #define	NANDFS_MAX_MOUNTS	65535
 
 #define	NANDFS_IOCTL_GET_SUSTAT		_IOR('N', 100, struct nandfs_seg_stat)
 #define	NANDFS_IOCTL_CHANGE_CPMODE	_IOWR('N', 101, struct nandfs_cpmode)
 #define	NANDFS_IOCTL_GET_CPINFO		_IOWR('N', 102, struct nandfs_argv)
 #define	NANDFS_IOCTL_DELETE_CP		_IOWR('N', 103, uint64_t[2])
 #define	NANDFS_IOCTL_GET_CPSTAT		_IOR('N', 104, struct nandfs_cpstat)
 #define	NANDFS_IOCTL_GET_SUINFO		_IOWR('N', 105, struct nandfs_argv)
 #define	NANDFS_IOCTL_GET_VINFO		_IOWR('N', 106, struct nandfs_argv)
 #define	NANDFS_IOCTL_GET_BDESCS		_IOWR('N', 107, struct nandfs_argv)
 #define	NANDFS_IOCTL_GET_FSINFO		_IOR('N', 108, struct nandfs_fsinfo)
 #define	NANDFS_IOCTL_MAKE_SNAP		_IOWR('N', 109, uint64_t)
 #define	NANDFS_IOCTL_DELETE_SNAP	_IOWR('N', 110, uint64_t)
 #define	NANDFS_IOCTL_SYNC		_IOWR('N', 111, uint64_t)
 
 #endif /* _NANDFS_FS_H */
Index: head/sys/fs/nfs/nfsport.h
===================================================================
--- head/sys/fs/nfs/nfsport.h	(revision 318735)
+++ head/sys/fs/nfs/nfsport.h	(revision 318736)
@@ -1,1048 +1,1042 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NFS_NFSPORT_H_
 #define	_NFS_NFSPORT_H_
 
 /*
  * In general, I'm not fond of #includes in .h files, but this seems
  * to be the cleanest way to handle #include files for the ports.
  */
 #ifdef _KERNEL
 #include <sys/unistd.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/domain.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/acl.h>
 #include <sys/module.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
 #include <sys/priv.h>
 #include <sys/kthread.h>
 #include <sys/syscallsubr.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <machine/in_cksum.h>
 #include <crypto/des/des.h>
 #include <sys/md5.h>
 #include <rpc/rpc.h>
 #include <rpc/rpcsec_gss.h>
 
 /*
  * For Darwin, these functions should be "static" when built in a kext.
  * (This is always defined as nil otherwise.)
  */
 #define	APPLESTATIC
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/ufsmount.h>
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <nfs/nfssvc.h>
 #include "opt_nfs.h"
 #include "opt_ufs.h"
 
 /*
  * These types must be defined before the nfs includes.
  */
 #define	NFSSOCKADDR_T	struct sockaddr *
 #define	NFSPROC_T	struct thread
 #define	NFSDEV_T	dev_t
 #define	NFSSVCARGS	nfssvc_args
 #define	NFSACL_T	struct acl
 
 /*
  * These should be defined as the types used for the corresponding VOP's
  * argument type.
  */
 #define	NFS_ACCESS_ARGS		struct vop_access_args
 #define	NFS_OPEN_ARGS		struct vop_open_args
 #define	NFS_GETATTR_ARGS	struct vop_getattr_args
 #define	NFS_LOOKUP_ARGS		struct vop_lookup_args
 #define	NFS_READDIR_ARGS	struct vop_readdir_args
 
 /*
  * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL.
  */
 #define	NFSMGET(m)	do { 					\
 		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 	} while (0)
 #define	NFSMGETHDR(m)	do { 					\
 		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 #define	NFSMCLGET(m, w)	do { 					\
 		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 		MCLGET((m), (w));				\
 	} while (0)
 #define	NFSMCLGETHDR(m, w) do { 				\
 		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 #define	NFSMTOD	mtod
 
 /*
  * Client side constant for size of a lockowner name.
  */
 #define	NFSV4CL_LOCKNAMELEN	12
 
 /*
  * Type for a mutex lock.
  */
 #define	NFSMUTEX_T		struct mtx
 
 #endif	/* _KERNEL */
 
 /*
  * NFSv4 Operation numbers.
  */
 #define	NFSV4OP_ACCESS		3
 #define	NFSV4OP_CLOSE		4
 #define	NFSV4OP_COMMIT		5
 #define	NFSV4OP_CREATE		6
 #define	NFSV4OP_DELEGPURGE	7
 #define	NFSV4OP_DELEGRETURN	8
 #define	NFSV4OP_GETATTR		9
 #define	NFSV4OP_GETFH		10
 #define	NFSV4OP_LINK		11
 #define	NFSV4OP_LOCK		12
 #define	NFSV4OP_LOCKT		13
 #define	NFSV4OP_LOCKU		14
 #define	NFSV4OP_LOOKUP		15
 #define	NFSV4OP_LOOKUPP		16
 #define	NFSV4OP_NVERIFY		17
 #define	NFSV4OP_OPEN		18
 #define	NFSV4OP_OPENATTR	19
 #define	NFSV4OP_OPENCONFIRM	20
 #define	NFSV4OP_OPENDOWNGRADE	21
 #define	NFSV4OP_PUTFH		22
 #define	NFSV4OP_PUTPUBFH	23
 #define	NFSV4OP_PUTROOTFH	24
 #define	NFSV4OP_READ		25
 #define	NFSV4OP_READDIR		26
 #define	NFSV4OP_READLINK	27
 #define	NFSV4OP_REMOVE		28
 #define	NFSV4OP_RENAME		29
 #define	NFSV4OP_RENEW		30
 #define	NFSV4OP_RESTOREFH	31
 #define	NFSV4OP_SAVEFH		32
 #define	NFSV4OP_SECINFO		33
 #define	NFSV4OP_SETATTR		34
 #define	NFSV4OP_SETCLIENTID	35
 #define	NFSV4OP_SETCLIENTIDCFRM	36
 #define	NFSV4OP_VERIFY		37
 #define	NFSV4OP_WRITE		38
 #define	NFSV4OP_RELEASELCKOWN	39
 
 /*
  * Must be one greater than the last Operation#.
  */
 #define	NFSV4OP_NOPS		40
 
 /*
  * Additional Ops for NFSv4.1.
  */
 #define	NFSV4OP_BACKCHANNELCTL	40
 #define	NFSV4OP_BINDCONNTOSESS	41
 #define	NFSV4OP_EXCHANGEID	42
 #define	NFSV4OP_CREATESESSION	43
 #define	NFSV4OP_DESTROYSESSION	44
 #define	NFSV4OP_FREESTATEID	45
 #define	NFSV4OP_GETDIRDELEG	46
 #define	NFSV4OP_GETDEVINFO	47
 #define	NFSV4OP_GETDEVLIST	48
 #define	NFSV4OP_LAYOUTCOMMIT	49
 #define	NFSV4OP_LAYOUTGET	50
 #define	NFSV4OP_LAYOUTRETURN	51
 #define	NFSV4OP_SECINFONONAME	52
 #define	NFSV4OP_SEQUENCE	53
 #define	NFSV4OP_SETSSV		54
 #define	NFSV4OP_TESTSTATEID	55
 #define	NFSV4OP_WANTDELEG	56
 #define	NFSV4OP_DESTROYCLIENTID	57
 #define	NFSV4OP_RECLAIMCOMPL	58
 
 /*
  * Must be one more than last op#.
  * NFSv4.2 isn't implemented yet, but define the op# limit for it.
  */
 #define	NFSV41_NOPS		59
 #define	NFSV42_NOPS		72
 
 /* Quirky case if the illegal op code */
 #define	NFSV4OP_OPILLEGAL	10044
 
 /*
  * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS.
  */
 #define	NFSV4OP_SYMLINK		(NFSV42_NOPS)
 #define	NFSV4OP_MKDIR		(NFSV42_NOPS + 1)
 #define	NFSV4OP_RMDIR		(NFSV42_NOPS + 2)
 #define	NFSV4OP_READDIRPLUS	(NFSV42_NOPS + 3)
 #define	NFSV4OP_MKNOD		(NFSV42_NOPS + 4)
 #define	NFSV4OP_FSSTAT		(NFSV42_NOPS + 5)
 #define	NFSV4OP_FSINFO		(NFSV42_NOPS + 6)
 #define	NFSV4OP_PATHCONF	(NFSV42_NOPS + 7)
 #define	NFSV4OP_V3CREATE	(NFSV42_NOPS + 8)
 
 /*
  * This is the count of the fake operations listed above.
  */
 #define	NFSV4OP_FAKENOPS	9
 
 /*
  * and the Callback OPs
  */
 #define	NFSV4OP_CBGETATTR	3
 #define	NFSV4OP_CBRECALL	4
 
 /*
  * Must be one greater than the last Callback Operation# for NFSv4.0.
  */
 #define	NFSV4OP_CBNOPS		5
 
 /*
  * Additional Callback Ops for NFSv4.1 only.
  */
 #define	NFSV4OP_CBLAYOUTRECALL	5
 #define	NFSV4OP_CBNOTIFY	6
 #define	NFSV4OP_CBPUSHDELEG	7
 #define	NFSV4OP_CBRECALLANY	8
 #define	NFSV4OP_CBRECALLOBJAVAIL 9
 #define	NFSV4OP_CBRECALLSLOT	10
 #define	NFSV4OP_CBSEQUENCE	11
 #define	NFSV4OP_CBWANTCANCELLED	12
 #define	NFSV4OP_CBNOTIFYLOCK	13
 #define	NFSV4OP_CBNOTIFYDEVID	14
 
 #define	NFSV41_CBNOPS		15
 #define	NFSV42_CBNOPS		16
 
 /*
  * The lower numbers -> 21 are used by NFSv2 and v3. These define higher
  * numbers used by NFSv4.
  * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is
  * one greater than the last number.
  */
 #ifndef	NFS_V3NPROCS
 #define	NFS_V3NPROCS		22
 
 #define	NFSPROC_LOOKUPP		22
 #define	NFSPROC_SETCLIENTID	23
 #define	NFSPROC_SETCLIENTIDCFRM	24
 #define	NFSPROC_LOCK		25
 #define	NFSPROC_LOCKU		26
 #define	NFSPROC_OPEN		27
 #define	NFSPROC_CLOSE		28
 #define	NFSPROC_OPENCONFIRM	29
 #define	NFSPROC_LOCKT		30
 #define	NFSPROC_OPENDOWNGRADE	31
 #define	NFSPROC_RENEW		32
 #define	NFSPROC_PUTROOTFH	33
 #define	NFSPROC_RELEASELCKOWN	34
 #define	NFSPROC_DELEGRETURN	35
 #define	NFSPROC_RETDELEGREMOVE	36
 #define	NFSPROC_RETDELEGRENAME1	37
 #define	NFSPROC_RETDELEGRENAME2	38
 #define	NFSPROC_GETACL		39
 #define	NFSPROC_SETACL		40
 
 /*
  * Must be defined as one higher than the last Proc# above.
  */
 #define	NFSV4_NPROCS		41
 
 /* Additional procedures for NFSv4.1. */
 #define	NFSPROC_EXCHANGEID	41
 #define	NFSPROC_CREATESESSION	42
 #define	NFSPROC_DESTROYSESSION	43
 #define	NFSPROC_DESTROYCLIENT	44
 #define	NFSPROC_FREESTATEID	45
 #define	NFSPROC_LAYOUTGET	46
 #define	NFSPROC_GETDEVICEINFO	47
 #define	NFSPROC_LAYOUTCOMMIT	48
 #define	NFSPROC_LAYOUTRETURN	49
 #define	NFSPROC_RECLAIMCOMPL	50
 #define	NFSPROC_WRITEDS		51
 #define	NFSPROC_READDS		52
 #define	NFSPROC_COMMITDS	53
 
 /*
  * Must be defined as one higher than the last NFSv4.1 Proc# above.
  */
 #define	NFSV41_NPROCS		54
 
 #endif	/* NFS_V3NPROCS */
 
 /*
  * New stats structure.
  * The vers field will be set to NFSSTATS_V1 by the caller.
  */
 #define	NFSSTATS_V1	1
 struct nfsstatsv1 {
 	int		vers;	/* Set to version requested by caller. */
 	uint64_t	attrcache_hits;
 	uint64_t	attrcache_misses;
 	uint64_t	lookupcache_hits;
 	uint64_t	lookupcache_misses;
 	uint64_t	direofcache_hits;
 	uint64_t	direofcache_misses;
 	uint64_t	accesscache_hits;
 	uint64_t	accesscache_misses;
 	uint64_t	biocache_reads;
 	uint64_t	read_bios;
 	uint64_t	read_physios;
 	uint64_t	biocache_writes;
 	uint64_t	write_bios;
 	uint64_t	write_physios;
 	uint64_t	biocache_readlinks;
 	uint64_t	readlink_bios;
 	uint64_t	biocache_readdirs;
 	uint64_t	readdir_bios;
 	uint64_t	rpccnt[NFSV41_NPROCS + 15];
 	uint64_t	rpcretries;
 	uint64_t	srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS];
 	uint64_t	srvrpc_errs;
 	uint64_t	srv_errs;
 	uint64_t	rpcrequests;
 	uint64_t	rpctimeouts;
 	uint64_t	rpcunexpected;
 	uint64_t	rpcinvalid;
 	uint64_t	srvcache_inproghits;
 	uint64_t	srvcache_idemdonehits;
 	uint64_t	srvcache_nonidemdonehits;
 	uint64_t	srvcache_misses;
 	uint64_t	srvcache_tcppeak;
 	int		srvcache_size;	/* Updated by atomic_xx_int(). */
 	uint64_t	srvclients;
 	uint64_t	srvopenowners;
 	uint64_t	srvopens;
 	uint64_t	srvlockowners;
 	uint64_t	srvlocks;
 	uint64_t	srvdelegates;
 	uint64_t	cbrpccnt[NFSV42_CBNOPS];
 	uint64_t	clopenowners;
 	uint64_t	clopens;
 	uint64_t	cllockowners;
 	uint64_t	cllocks;
 	uint64_t	cldelegates;
 	uint64_t	cllocalopenowners;
 	uint64_t	cllocalopens;
 	uint64_t	cllocallockowners;
 	uint64_t	cllocallocks;
 	uint64_t	srvstartcnt;
 	uint64_t	srvdonecnt;
 	uint64_t	srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS];
 	uint64_t	srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS];
 	struct bintime	srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS];
 	struct bintime	busyfrom;
 	struct bintime	busytime;
 };
 
 /*
  * Old stats structure.
  */
 struct ext_nfsstats {
 	int	attrcache_hits;
 	int	attrcache_misses;
 	int	lookupcache_hits;
 	int	lookupcache_misses;
 	int	direofcache_hits;
 	int	direofcache_misses;
 	int	accesscache_hits;
 	int	accesscache_misses;
 	int	biocache_reads;
 	int	read_bios;
 	int	read_physios;
 	int	biocache_writes;
 	int	write_bios;
 	int	write_physios;
 	int	biocache_readlinks;
 	int	readlink_bios;
 	int	biocache_readdirs;
 	int	readdir_bios;
 	int	rpccnt[NFSV4_NPROCS];
 	int	rpcretries;
 	int	srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS];
 	int	srvrpc_errs;
 	int	srv_errs;
 	int	rpcrequests;
 	int	rpctimeouts;
 	int	rpcunexpected;
 	int	rpcinvalid;
 	int	srvcache_inproghits;
 	int	srvcache_idemdonehits;
 	int	srvcache_nonidemdonehits;
 	int	srvcache_misses;
 	int	srvcache_tcppeak;
 	int	srvcache_size;
 	int	srvclients;
 	int	srvopenowners;
 	int	srvopens;
 	int	srvlockowners;
 	int	srvlocks;
 	int	srvdelegates;
 	int	cbrpccnt[NFSV4OP_CBNOPS];
 	int	clopenowners;
 	int	clopens;
 	int	cllockowners;
 	int	cllocks;
 	int	cldelegates;
 	int	cllocalopenowners;
 	int	cllocalopens;
 	int	cllocallockowners;
 	int	cllocallocks;
 };
 
 #ifdef _KERNEL
 /*
  * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code.
  */
 #ifndef	NFS_NPROCS
 #define	NFS_NPROCS		NFSV4_NPROCS
 #endif
 
 #include <fs/nfs/nfskpiport.h>
 #include <fs/nfs/nfsdport.h>
 #include <fs/nfs/rpcv2.h>
 #include <fs/nfs/nfsproto.h>
 #include <fs/nfs/nfs.h>
 #include <fs/nfs/nfsclstate.h>
 #include <fs/nfs/nfs_var.h>
 #include <fs/nfs/nfsm_subs.h>
 #include <fs/nfs/nfsrvcache.h>
 #include <fs/nfs/nfsrvstate.h>
 #include <fs/nfs/xdr_subs.h>
 #include <fs/nfs/nfscl.h>
 #include <nfsclient/nfsargs.h>
 #include <fs/nfsclient/nfsmount.h>
 
 /*
  * Just to keep nfs_var.h happy.
  */
 struct nfs_vattr {
 	int	junk;
 };
 
 struct nfsvattr {
 	struct vattr	na_vattr;
 	nfsattrbit_t	na_suppattr;
 	u_int32_t	na_mntonfileno;
 	u_int64_t	na_filesid[2];
 };
 
 #define	na_type		na_vattr.va_type
 #define	na_mode		na_vattr.va_mode
 #define	na_nlink	na_vattr.va_nlink
 #define	na_uid		na_vattr.va_uid
 #define	na_gid		na_vattr.va_gid
 #define	na_fsid		na_vattr.va_fsid
 #define	na_fileid	na_vattr.va_fileid
 #define	na_size		na_vattr.va_size
 #define	na_blocksize	na_vattr.va_blocksize
 #define	na_atime	na_vattr.va_atime
 #define	na_mtime	na_vattr.va_mtime
 #define	na_ctime	na_vattr.va_ctime
 #define	na_gen		na_vattr.va_gen
 #define	na_flags	na_vattr.va_flags
 #define	na_rdev		na_vattr.va_rdev
 #define	na_bytes	na_vattr.va_bytes
 #define	na_filerev	na_vattr.va_filerev
 #define	na_vaflags	na_vattr.va_vaflags
 
 #include <fs/nfsclient/nfsnode.h>
 
 /*
  * This is the header structure used for the lists, etc. (It has the
  * above record in it.
  */
 struct nfsrv_stablefirst {
 	LIST_HEAD(, nfsrv_stable) nsf_head;	/* Head of nfsrv_stable list */
 	time_t		nsf_eograce;	/* Time grace period ends */
 	time_t		*nsf_bootvals;	/* Previous boottime values */
 	struct file	*nsf_fp;	/* File table pointer */
 	u_char		nsf_flags;	/* NFSNSF_ flags */
 	struct nfsf_rec	nsf_rec;	/* and above first record */
 };
 #define	nsf_lease	nsf_rec.lease
 #define	nsf_numboots	nsf_rec.numboots
 
 /* NFSNSF_xxx flags */
 #define	NFSNSF_UPDATEDONE	0x01
 #define	NFSNSF_GRACEOVER	0x02
 #define	NFSNSF_NEEDLOCK		0x04
 #define	NFSNSF_EXPIREDCLIENT	0x08
 #define	NFSNSF_NOOPENS		0x10
 #define	NFSNSF_OK		0x20
 
 /*
  * Maximum number of boot times allowed in record. Although there is
  * really no need for a fixed upper bound, this serves as a sanity check
  * for a corrupted file.
  */
 #define	NFSNSF_MAXNUMBOOTS	10000
 
 /*
  * This structure defines the other records in the file. The
  * nst_client array is actually the size of the client string name.
  */
 struct nfst_rec {
 	u_int16_t	len;
 	u_char		flag;
 	u_char		client[1];
 };
 /* and the values for flag */
 #define	NFSNST_NEWSTATE	0x1
 #define	NFSNST_REVOKE		0x2
 #define	NFSNST_GOTSTATE		0x4
 
 /*
  * This structure is linked onto nfsrv_stablefirst for the duration of
  * reclaim.
  */
 struct nfsrv_stable {
 	LIST_ENTRY(nfsrv_stable) nst_list;
 	struct nfsclient	*nst_clp;
 	struct nfst_rec		nst_rec;
 };
 #define	nst_timestamp	nst_rec.timestamp
 #define	nst_len		nst_rec.len
 #define	nst_flag	nst_rec.flag
 #define	nst_client	nst_rec.client
 
 /*
  * At some point the server will run out of kernel storage for
  * state structures. For FreeBSD5.2, this results in a panic
  * kmem_map is full. It happens at well over 1000000 opens plus
  * locks on a PIII-800 with 256Mbytes, so that is where I've set
  * the limit. If your server panics due to too many opens/locks,
  * decrease the size of NFSRV_V4STATELIMIT. If you find the server
  * returning NFS4ERR_RESOURCE a lot and have lots of memory, try
  * increasing it.
  */
 #define	NFSRV_V4STATELIMIT	500000	/* Max # of Opens + Locks */
 
 /*
  * The type required differs with BSDen (just the second arg).
  */
 void nfsrvd_rcv(struct socket *, void *, int);
 
 /*
  * Macros for handling socket addresses. (Hopefully this makes the code
  * more portable, since I've noticed some 'BSD don't have sockaddrs in
  * mbufs any more.)
  */
 #define	NFSSOCKADDR(a, t)	((t)(a))
 #define	NFSSOCKADDRALLOC(a) 					\
     do {							\
 	MALLOC((a), struct sockaddr *, sizeof (struct sockaddr), \
 	    M_SONAME, M_WAITOK); 				\
 	NFSBZERO((a), sizeof (struct sockaddr)); 		\
     } while (0)
 #define	NFSSOCKADDRSIZE(a, s)		((a)->sa_len = (s))
 #define	NFSSOCKADDRFREE(a) 					\
 	do { 							\
 		if (a) 						\
 			FREE((caddr_t)(a), M_SONAME); 		\
 	} while (0)
 
 /*
  * These should be defined as a process or thread structure, as required
  * for signal handling, etc.
  */
 #define	NFSNEWCRED(c)		(crdup(c))
 #define	NFSPROCCRED(p)		((p)->td_ucred)
 #define	NFSFREECRED(c)		(crfree(c))
 #define	NFSUIOPROC(u, p)	((u)->uio_td = NULL)
 #define	NFSPROCP(p)		((p)->td_proc)
 
 /*
  * Define these so that cn_hash and its length is ignored.
  */
 #define	NFSCNHASHZERO(c)
 #define	NFSCNHASH(c, v)
 #define	NCHNAMLEN	9999999
 
 /*
  * These macros are defined to initialize and set the timer routine.
  */
 #define	NFS_TIMERINIT \
 	newnfs_timer(NULL)
 
 /*
  * Handle SMP stuff:
  */
 #define	NFSSTATESPINLOCK	extern struct mtx nfs_state_mutex
 #define	NFSLOCKSTATE()		mtx_lock(&nfs_state_mutex)
 #define	NFSUNLOCKSTATE()	mtx_unlock(&nfs_state_mutex)
 #define	NFSSTATEMUTEXPTR	(&nfs_state_mutex)
 #define	NFSREQSPINLOCK		extern struct mtx nfs_req_mutex
 #define	NFSLOCKREQ()		mtx_lock(&nfs_req_mutex)
 #define	NFSUNLOCKREQ()		mtx_unlock(&nfs_req_mutex)
 #define	NFSSOCKMUTEX		extern struct mtx nfs_slock_mutex
 #define	NFSSOCKMUTEXPTR		(&nfs_slock_mutex)
 #define	NFSLOCKSOCK()		mtx_lock(&nfs_slock_mutex)
 #define	NFSUNLOCKSOCK()		mtx_unlock(&nfs_slock_mutex)
 #define	NFSNAMEIDMUTEX		extern struct mtx nfs_nameid_mutex
 #define	NFSLOCKNAMEID()		mtx_lock(&nfs_nameid_mutex)
 #define	NFSUNLOCKNAMEID()	mtx_unlock(&nfs_nameid_mutex)
 #define	NFSNAMEIDREQUIRED()	mtx_assert(&nfs_nameid_mutex, MA_OWNED)
 #define	NFSCLSTATEMUTEX		extern struct mtx nfs_clstate_mutex
 #define	NFSCLSTATEMUTEXPTR	(&nfs_clstate_mutex)
 #define	NFSLOCKCLSTATE()	mtx_lock(&nfs_clstate_mutex)
 #define	NFSUNLOCKCLSTATE()	mtx_unlock(&nfs_clstate_mutex)
 #define	NFSDLOCKMUTEX		extern struct mtx newnfsd_mtx
 #define	NFSDLOCKMUTEXPTR	(&newnfsd_mtx)
 #define	NFSD_LOCK()		mtx_lock(&newnfsd_mtx)
 #define	NFSD_UNLOCK()		mtx_unlock(&newnfsd_mtx)
 #define	NFSD_LOCK_ASSERT()	mtx_assert(&newnfsd_mtx, MA_OWNED)
 #define	NFSD_UNLOCK_ASSERT()	mtx_assert(&newnfsd_mtx, MA_NOTOWNED)
 #define	NFSV4ROOTLOCKMUTEX	extern struct mtx nfs_v4root_mutex
 #define	NFSV4ROOTLOCKMUTEXPTR	(&nfs_v4root_mutex)
 #define	NFSLOCKV4ROOTMUTEX()	mtx_lock(&nfs_v4root_mutex)
 #define	NFSUNLOCKV4ROOTMUTEX()	mtx_unlock(&nfs_v4root_mutex)
 #define	NFSLOCKNODE(n)		mtx_lock(&((n)->n_mtx))
 #define	NFSUNLOCKNODE(n)	mtx_unlock(&((n)->n_mtx))
 #define	NFSLOCKMNT(m)		mtx_lock(&((m)->nm_mtx))
 #define	NFSUNLOCKMNT(m)		mtx_unlock(&((m)->nm_mtx))
 #define	NFSLOCKREQUEST(r)	mtx_lock(&((r)->r_mtx))
 #define	NFSUNLOCKREQUEST(r)	mtx_unlock(&((r)->r_mtx))
 #define	NFSPROCLISTLOCK()	sx_slock(&allproc_lock)
 #define	NFSPROCLISTUNLOCK()	sx_sunlock(&allproc_lock)
 #define	NFSLOCKSOCKREQ(r)	mtx_lock(&((r)->nr_mtx))
 #define	NFSUNLOCKSOCKREQ(r)	mtx_unlock(&((r)->nr_mtx))
 #define	NFSLOCKDS(d)		mtx_lock(&((d)->nfsclds_mtx))
 #define	NFSUNLOCKDS(d)		mtx_unlock(&((d)->nfsclds_mtx))
 #define	NFSSESSIONMUTEXPTR(s)	(&((s)->mtx))
 #define	NFSLOCKSESSION(s)	mtx_lock(&((s)->mtx))
 #define	NFSUNLOCKSESSION(s)	mtx_unlock(&((s)->mtx))
 
 /*
  * Use these macros to initialize/free a mutex.
  */
 #define	NFSINITSOCKMUTEX(m)	mtx_init((m), "nfssock", NULL, MTX_DEF)
 #define	NFSFREEMUTEX(m)		mtx_destroy((m))
 
 int nfsmsleep(void *, void *, int, const char *, struct timespec *);
 
 /*
  * And weird vm stuff in the nfs server.
  */
 #define	PDIRUNLOCK	0x0
 #define	MAX_COMMIT_COUNT	(1024 * 1024)
 
 /*
  * Define these to handle the type of va_rdev.
  */
 #define	NFSMAKEDEV(m, n)	makedev((m), (n))
 #define	NFSMAJOR(d)		major(d)
 #define	NFSMINOR(d)		minor(d)
 
 /*
- * Define this to be the macro that returns the minimum size required
- * for a directory entry.
- */
-#define	DIRENT_SIZE(dp)		GENERIC_DIRSIZ(dp)
-
-/*
  * The vnode tag for nfsv4root.
  */
 #define	VT_NFSV4ROOT		"nfsv4root"
 
 /*
  * Define whatever it takes to do a vn_rdwr().
  */
 #define	NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \
 	vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p))
 
 /*
  * Macros for handling memory for different BSDen.
  * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping
  * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap
  * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same
  * NFSBZERO(cp, len) - set len bytes to 0x0
  */
 #define	NFSBCOPY(s, d, l)	bcopy((s), (d), (l))
 #define	NFSOVBCOPY(s, d, l)	ovbcopy((s), (d), (l))
 #define	NFSBCMP(s, d, l)	bcmp((s), (d), (l))
 #define	NFSBZERO(s, l)		bzero((s), (l))
 
 /*
  * Some queue.h files don't have these dfined in them.
  */
 #define	LIST_END(head)		NULL
 #define	SLIST_END(head)		NULL
 #define	TAILQ_END(head)		NULL
 
 /*
  * This must be defined to be a global variable that increments once
  * per second, but never stops or goes backwards, even when a "date"
  * command changes the TOD clock. It is used for delta times for
  * leases, etc.
  */
 #define	NFSD_MONOSEC		time_uptime
 
 /*
  * Declare the malloc types.
  */
 MALLOC_DECLARE(M_NEWNFSRVCACHE);
 MALLOC_DECLARE(M_NEWNFSDCLIENT);
 MALLOC_DECLARE(M_NEWNFSDSTATE);
 MALLOC_DECLARE(M_NEWNFSDLOCK);
 MALLOC_DECLARE(M_NEWNFSDLOCKFILE);
 MALLOC_DECLARE(M_NEWNFSSTRING);
 MALLOC_DECLARE(M_NEWNFSUSERGROUP);
 MALLOC_DECLARE(M_NEWNFSDREQ);
 MALLOC_DECLARE(M_NEWNFSFH);
 MALLOC_DECLARE(M_NEWNFSCLOWNER);
 MALLOC_DECLARE(M_NEWNFSCLOPEN);
 MALLOC_DECLARE(M_NEWNFSCLDELEG);
 MALLOC_DECLARE(M_NEWNFSCLCLIENT);
 MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER);
 MALLOC_DECLARE(M_NEWNFSCLLOCK);
 MALLOC_DECLARE(M_NEWNFSDIROFF);
 MALLOC_DECLARE(M_NEWNFSV4NODE);
 MALLOC_DECLARE(M_NEWNFSDIRECTIO);
 MALLOC_DECLARE(M_NEWNFSMNT);
 MALLOC_DECLARE(M_NEWNFSDROLLBACK);
 MALLOC_DECLARE(M_NEWNFSLAYOUT);
 MALLOC_DECLARE(M_NEWNFSFLAYOUT);
 MALLOC_DECLARE(M_NEWNFSDEVINFO);
 MALLOC_DECLARE(M_NEWNFSSOCKREQ);
 MALLOC_DECLARE(M_NEWNFSCLDS);
 MALLOC_DECLARE(M_NEWNFSLAYRECALL);
 MALLOC_DECLARE(M_NEWNFSDSESSION);
 #define	M_NFSRVCACHE	M_NEWNFSRVCACHE
 #define	M_NFSDCLIENT	M_NEWNFSDCLIENT
 #define	M_NFSDSTATE	M_NEWNFSDSTATE
 #define	M_NFSDLOCK	M_NEWNFSDLOCK
 #define	M_NFSDLOCKFILE	M_NEWNFSDLOCKFILE
 #define	M_NFSSTRING	M_NEWNFSSTRING
 #define	M_NFSUSERGROUP	M_NEWNFSUSERGROUP
 #define	M_NFSDREQ	M_NEWNFSDREQ
 #define	M_NFSFH		M_NEWNFSFH
 #define	M_NFSCLOWNER	M_NEWNFSCLOWNER
 #define	M_NFSCLOPEN	M_NEWNFSCLOPEN
 #define	M_NFSCLDELEG	M_NEWNFSCLDELEG
 #define	M_NFSCLCLIENT	M_NEWNFSCLCLIENT
 #define	M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER
 #define	M_NFSCLLOCK	M_NEWNFSCLLOCK
 #define	M_NFSDIROFF	M_NEWNFSDIROFF
 #define	M_NFSV4NODE	M_NEWNFSV4NODE
 #define	M_NFSDIRECTIO	M_NEWNFSDIRECTIO
 #define	M_NFSDROLLBACK	M_NEWNFSDROLLBACK
 #define	M_NFSLAYOUT	M_NEWNFSLAYOUT
 #define	M_NFSFLAYOUT	M_NEWNFSFLAYOUT
 #define	M_NFSDEVINFO	M_NEWNFSDEVINFO
 #define	M_NFSSOCKREQ	M_NEWNFSSOCKREQ
 #define	M_NFSCLDS	M_NEWNFSCLDS
 #define	M_NFSLAYRECALL	M_NEWNFSLAYRECALL
 #define	M_NFSDSESSION	M_NEWNFSDSESSION
 
 #define	NFSINT_SIGMASK(set) 						\
 	(SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) ||	\
 	 SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) ||	\
 	 SIGISMEMBER(set, SIGQUIT))
 
 /*
  * Convert a quota block count to byte count.
  */
 #define	NFSQUOTABLKTOBYTE(q, b)	(q) *= (b)
 
 /*
  * Define this as the largest file size supported. (It should probably
  * be available via a VFS_xxx Op, but it isn't.
  */
 #define	NFSRV_MAXFILESIZE	((u_int64_t)0x800000000000)
 
 /*
  * Set this macro to index() or strchr(), whichever is supported.
  */
 #define	STRCHR(s, c)		strchr((s), (c))
 
 /*
  * Set the n_time in the client write rpc, as required.
  */
 #define	NFSWRITERPC_SETTIME(w, n, a, v4)				\
 	do {								\
 		if (w) {						\
 			mtx_lock(&((n)->n_mtx));			\
 			(n)->n_mtime = (a)->na_mtime;			\
 			if (v4)						\
 				(n)->n_change = (a)->na_filerev;	\
 			mtx_unlock(&((n)->n_mtx));			\
 		}							\
 	} while (0)
 
 /*
  * Fake value, just to make the client work.
  */
 #define	NFS_LATTR_NOSHRINK	1
 
 /*
  * Prototypes for functions where the arguments vary for different ports.
  */
 int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *,
     int, int);
 int newnfs_realign(struct mbuf **, int);
 
 /*
  * If the port runs on an SMP box that can enforce Atomic ops with low
  * overheads, define these as atomic increments/decrements. If not,
  * don't worry about it, since these are used for stats that can be
  * "out by one" without disastrous consequences.
  */
 #define	NFSINCRGLOBAL(a)	((a)++)
 
 /*
  * Assorted funky stuff to make things work under Darwin8.
  */
 /*
  * These macros checks for a field in vattr being set.
  */
 #define	NFSATTRISSET(t, v, a)	((v)->a != (t)VNOVAL)
 #define	NFSATTRISSETTIME(v, a)	((v)->a.tv_sec != VNOVAL)
 
 /*
  * Manipulate mount flags.
  */
 #define	NFSSTA_HASWRITEVERF	0x00040000  /* Has write verifier */
 #define	NFSSTA_GOTFSINFO	0x00100000  /* Got the fsinfo */
 #define	NFSSTA_OPENMODE		0x00200000  /* Must use correct open mode */
 #define	NFSSTA_NOLAYOUTCOMMIT	0x04000000  /* Don't do LayoutCommit */
 #define	NFSSTA_SESSPERSIST	0x08000000  /* Has a persistent session */
 #define	NFSSTA_TIMEO		0x10000000  /* Experiencing a timeout */
 #define	NFSSTA_LOCKTIMEO	0x20000000  /* Experiencing a lockd timeout */
 #define	NFSSTA_HASSETFSID	0x40000000  /* Has set the fsid */
 #define	NFSSTA_PNFS		0x80000000  /* pNFS is enabled */
 
 #define	NFSHASNFSV3(n)		((n)->nm_flag & NFSMNT_NFSV3)
 #define	NFSHASNFSV4(n)		((n)->nm_flag & NFSMNT_NFSV4)
 #define	NFSHASNFSV4N(n)		((n)->nm_minorvers > 0)
 #define	NFSHASNFSV3OR4(n)	((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4))
 #define	NFSHASGOTFSINFO(n)	((n)->nm_state & NFSSTA_GOTFSINFO)
 #define	NFSHASHASSETFSID(n)	((n)->nm_state & NFSSTA_HASSETFSID)
 #define	NFSHASSTRICT3530(n)	((n)->nm_flag & NFSMNT_STRICT3530)
 #define	NFSHASWRITEVERF(n)	((n)->nm_state & NFSSTA_HASWRITEVERF)
 #define	NFSHASINT(n)		((n)->nm_flag & NFSMNT_INT)
 #define	NFSHASSOFT(n)		((n)->nm_flag & NFSMNT_SOFT)
 #define	NFSHASINTORSOFT(n)	((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT))
 #define	NFSHASDUMBTIMR(n)	((n)->nm_flag & NFSMNT_DUMBTIMR)
 #define	NFSHASNOCONN(n)		((n)->nm_flag & NFSMNT_MNTD)
 #define	NFSHASKERB(n)		((n)->nm_flag & NFSMNT_KERB)
 #define	NFSHASALLGSSNAME(n)	((n)->nm_flag & NFSMNT_ALLGSSNAME)
 #define	NFSHASINTEGRITY(n)	((n)->nm_flag & NFSMNT_INTEGRITY)
 #define	NFSHASPRIVACY(n)	((n)->nm_flag & NFSMNT_PRIVACY)
 #define	NFSSETWRITEVERF(n)	((n)->nm_state |= NFSSTA_HASWRITEVERF)
 #define	NFSSETHASSETFSID(n)	((n)->nm_state |= NFSSTA_HASSETFSID)
 #define	NFSHASPNFSOPT(n)	((n)->nm_flag & NFSMNT_PNFS)
 #define	NFSHASNOLAYOUTCOMMIT(n)	((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT)
 #define	NFSHASSESSPERSIST(n)	((n)->nm_state & NFSSTA_SESSPERSIST)
 #define	NFSHASPNFS(n)		((n)->nm_state & NFSSTA_PNFS)
 #define	NFSHASOPENMODE(n)	((n)->nm_state & NFSSTA_OPENMODE)
 #define	NFSHASONEOPENOWN(n)	(((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&	\
 				    (n)->nm_minorvers > 0)
 
 /*
  * Gets the stats field out of the mount structure.
  */
 #define	vfs_statfs(m)	(&((m)->mnt_stat))
 
 /*
  * Set boottime.
  */
 #define	NFSSETBOOTTIME(b)	(getboottime(&b))
 
 /*
  * The size of directory blocks in the buffer cache.
  * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!!
  */
 #define	NFS_DIRBLKSIZ	(16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */
 
 /*
  * Define these macros to access mnt_flag fields.
  */
 #define	NFSMNT_RDONLY(m)	((m)->mnt_flag & MNT_RDONLY)
 #endif	/* _KERNEL */
 
 /*
  * Define a structure similar to ufs_args for use in exporting the V4 root.
  */
 struct nfsex_args {
 	char	*fspec;
 	struct export_args	export;
 };
 
 /*
  * These export flags should be defined, but there are no bits left.
  * Maybe a separate mnt_exflag field could be added or the mnt_flag
  * field increased to 64 bits?
  */
 #ifndef	MNT_EXSTRICTACCESS
 #define	MNT_EXSTRICTACCESS	0x0
 #endif
 #ifndef MNT_EXV4ONLY
 #define	MNT_EXV4ONLY		0x0
 #endif
 
 #ifdef _KERNEL
 /*
  * Define this to invalidate the attribute cache for the nfs node.
  */
 #define	NFSINVALATTRCACHE(n)	((n)->n_attrstamp = 0)
 
 /* Used for FreeBSD only */
 void nfsd_mntinit(void);
 
 /*
  * Define these for vnode lock/unlock ops.
  *
  * These are good abstractions to macro out, so that they can be added to
  * later, for debugging or stats, etc.
  */
 #define	NFSVOPLOCK(v, f)	vn_lock((v), (f))
 #define	NFSVOPUNLOCK(v, f)	VOP_UNLOCK((v), (f))
 #define	NFSVOPISLOCKED(v)	VOP_ISLOCKED((v))
 
 /*
  * Define ncl_hash().
  */
 #define	ncl_hash(f, l)	(fnv_32_buf((f), (l), FNV1_32_INIT))
 
 int newnfs_iosize(struct nfsmount *);
 
 int newnfs_vncmpf(struct vnode *, void *);
 
 #ifndef NFS_MINDIRATTRTIMO
 #define	NFS_MINDIRATTRTIMO 3		/* VDIR attrib cache timeout in sec */
 #endif
 #ifndef NFS_MAXDIRATTRTIMO
 #define	NFS_MAXDIRATTRTIMO 60
 #endif
 
 /*
  * Nfs outstanding request list element
  */
 struct nfsreq {
 	TAILQ_ENTRY(nfsreq) r_chain;
 	u_int32_t	r_flags;	/* flags on request, see below */
 	struct nfsmount *r_nmp;		/* Client mnt ptr */
 	struct mtx	r_mtx;		/* Mutex lock for this structure */
 };
 
 #ifndef NFS_MAXBSIZE
 #define	NFS_MAXBSIZE	MAXBCACHEBUF
 #endif
 
 /*
  * This macro checks to see if issuing of delegations is allowed for this
  * vnode.
  */
 #ifdef VV_DISABLEDELEG
 #define	NFSVNO_DELEGOK(v)						\
 	((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0)
 #else
 #define	NFSVNO_DELEGOK(v)	(1)
 #endif
 
 /*
  * Name used by getnewvnode() to describe filesystem, "nfs".
  * For performance reasons it is useful to have the same string
  * used in both places that call getnewvnode().
  */
 extern const char nfs_vnode_tag[];
 
 #endif	/* _KERNEL */
 
 #endif	/* _NFS_NFSPORT_H */
Index: head/sys/fs/nfsclient/nfs_clrpcops.c
===================================================================
--- head/sys/fs/nfsclient/nfs_clrpcops.c	(revision 318735)
+++ head/sys/fs/nfsclient/nfs_clrpcops.c	(revision 318736)
@@ -1,6005 +1,6015 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Rpc op calls, generally called from the vnode op calls or through the
  * buffer cache, for NFS v2, 3 and 4.
  * These do not normally make any changes to vnode arguments or use
  * structures that might change between the VFS variants. The returned
  * arguments are all at the end, after the NFSPROC_T *p one.
  */
 
 #ifndef APPLEKEXT
 #include "opt_inet6.h"
 
 #include <fs/nfs/nfsport.h>
 #include <sys/sysctl.h>
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsignore_eexist = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
 
 /*
  * Global variables
  */
 extern int nfs_numnfscbd;
 extern struct timeval nfsboottime;
 extern u_int32_t newnfs_false, newnfs_true;
 extern nfstype nfsv34_type[9];
 extern int nfsrv_useacl;
 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
 extern int nfscl_debuglevel;
 NFSCLSTATEMUTEX;
 int nfstest_outofseq = 0;
 int nfscl_assumeposixlocks = 1;
 int nfscl_enablecallb = 0;
 short nfsv4_cbport = NFSV4_CBPORT;
 int nfstest_openallsetattr = 0;
 #endif	/* !APPLEKEXT */
 
-#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
+#define	DIRHDSIZ	offsetof(struct dirent, d_name)
 
 /*
  * nfscl_getsameserver() can return one of three values:
  * NFSDSP_USETHISSESSION - Use this session for the DS.
  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
  *     session.
  * NFSDSP_NOTFOUND - No matching server was found.
  */
 enum nfsclds_state {
 	NFSDSP_USETHISSESSION = 0,
 	NFSDSP_SEQTHISSESSION = 1,
 	NFSDSP_NOTFOUND = 2,
 };
 
 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
     void *);
 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
     int *, void *, int *);
 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
     struct nfscllockowner *, u_int64_t, u_int64_t,
     u_int32_t, struct ucred *, NFSPROC_T *, int);
 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
     struct acl *, nfsv4stateid_t *, void *);
 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
     struct ucred *, NFSPROC_T *);
 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
     struct nfsclds **, NFSPROC_T *);
 static void nfscl_initsessionslots(struct nfsclsession *);
 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
     struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *);
 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
     struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
     NFSPROC_T *);
 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
     struct nfsfh *, int, struct ucred *, NFSPROC_T *);
 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
     struct nfsclds *, struct nfsclds **);
 #ifdef notyet
 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
     struct nfsfh *, struct ucred *, NFSPROC_T *, void *);
 #endif
 
 /*
  * nfs null call from vfs.
  */
 APPLESTATIC int
 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
 {
 	int error;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	
 	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs access rpc op.
  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
  * modes are changed on the server, accesses might still fail later.
  */
 APPLESTATIC int
 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
 {
 	int error;
 	u_int32_t mode, rmode;
 
 	if (acmode & VREAD)
 		mode = NFSACCESS_READ;
 	else
 		mode = 0;
 	if (vnode_vtype(vp) == VDIR) {
 		if (acmode & VWRITE)
 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
 				 NFSACCESS_DELETE);
 		if (acmode & VEXEC)
 			mode |= NFSACCESS_LOOKUP;
 	} else {
 		if (acmode & VWRITE)
 			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 		if (acmode & VEXEC)
 			mode |= NFSACCESS_EXECUTE;
 	}
 
 	/*
 	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
 	 */
 	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
 	    NULL);
 
 	/*
 	 * The NFS V3 spec does not clarify whether or not
 	 * the returned access bits can be a superset of
 	 * the ones requested, so...
 	 */
 	if (!error && (rmode & mode) != mode)
 		error = EACCES;
 	return (error);
 }
 
 /*
  * The actual rpc, separated out for Darwin.
  */
 APPLESTATIC int
 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
     void *stuff)
 {
 	u_int32_t *tl;
 	u_int32_t supported, rmode;
 	int error;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 
 	*attrflagp = 0;
 	supported = mode;
 	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(mode);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3) {
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (error)
 			goto nfsmout;
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			supported = fxdr_unsigned(u_int32_t, *tl++);
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		}
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		if (nd->nd_flag & ND_NFSV4)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 
 		/*
 		 * It's not obvious what should be done about
 		 * unsupported access modes. For now, be paranoid
 		 * and clear the unsupported ones.
 		 */
 		rmode &= supported;
 		*rmodep = rmode;
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs open rpc
  */
 APPLESTATIC int
 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsclopen *op;
 	struct nfscldeleg *dp;
 	struct nfsfh *nfhp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	u_int32_t mode, clidrev;
 	int ret, newone, error, expireret = 0, retrycnt;
 
 	/*
 	 * For NFSv4, Open Ops are only done on Regular Files.
 	 */
 	if (vnode_vtype(vp) != VREG)
 		return (0);
 	mode = 0;
 	if (amode & FREAD)
 		mode |= NFSV4OPEN_ACCESSREAD;
 	if (amode & FWRITE)
 		mode |= NFSV4OPEN_ACCESSWRITE;
 	nfhp = np->n_fhp;
 
 	retrycnt = 0;
 #ifdef notdef
 { char name[100]; int namel;
 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
 bcopy(NFS4NODENAME(np->n_v4), name, namel);
 name[namel] = '\0';
 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
 else printf(" fhl=0\n");
 }
 #endif
 	do {
 	    dp = NULL;
 	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
 		cred, p, NULL, &op, &newone, &ret, 1);
 	    if (error) {
 		return (error);
 	    }
 	    if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	    else
 		clidrev = 0;
 	    if (ret == NFSCLOPEN_DOOPEN) {
 		if (np->n_v4 != NULL) {
 			error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data,
 			   np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
 			   np->n_fhp->nfh_len, mode, op,
 			   NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp,
 			   0, 0x0, cred, p, 0, 0);
 			if (dp != NULL) {
 #ifdef APPLE
 				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
 #else
 				NFSLOCKNODE(np);
 				np->n_flag &= ~NDELEGMOD;
 				/*
 				 * Invalidate the attribute cache, so that
 				 * attributes that pre-date the issue of a
 				 * delegation are not cached, since the
 				 * cached attributes will remain valid while
 				 * the delegation is held.
 				 */
 				NFSINVALATTRCACHE(np);
 				NFSUNLOCKNODE(np);
 #endif
 				(void) nfscl_deleg(nmp->nm_mountp,
 				    op->nfso_own->nfsow_clp,
 				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
 			}
 		} else {
 			error = EIO;
 		}
 		newnfs_copyincred(cred, &op->nfso_cred);
 	    } else if (ret == NFSCLOPEN_SETCRED)
 		/*
 		 * This is a new local open on a delegation. It needs
 		 * to have credentials so that an open can be done
 		 * against the server during recovery.
 		 */
 		newnfs_copyincred(cred, &op->nfso_cred);
 
 	    /*
 	     * nfso_opencnt is the count of how many VOP_OPEN()s have
 	     * been done on this Open successfully and a VOP_CLOSE()
 	     * is expected for each of these.
 	     * If error is non-zero, don't increment it, since the Open
 	     * hasn't succeeded yet.
 	     */
 	    if (!error)
 		op->nfso_opencnt++;
 	    nfscl_openrelease(nmp, op, error, newone);
 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_open");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		retrycnt++;
 	    }
 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 /*
  * the actual open rpc
  */
 APPLESTATIC int
 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
     int syscred, int recursed)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfscldeleg *dp, *ndp = NULL;
 	struct nfsvattr nfsva;
 	u_int32_t rflags, deleg;
 	nfsattrbit_t attrbits;
 	int error, ret, acesize, limitby;
 	struct nfsclsession *tsep;
 
 	dp = *dpp;
 	*dpp = NULL;
 	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
 	if (reclaim) {
 		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(delegtype);
 	} else {
 		if (dp != NULL) {
 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = dp->nfsdl_stateid.seqid;
 			*tl++ = dp->nfsdl_stateid.other[0];
 			*tl++ = dp->nfsdl_stateid.other[1];
 			*tl = dp->nfsdl_stateid.other[2];
 		} else {
 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 		}
 		(void) nfsm_strtom(nd, name, namelen);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error)
 			goto nfsmout;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(u_int32_t, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				op->nfso_own->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			MALLOC(ndp, struct nfscldeleg *,
 			    sizeof (struct nfscldeleg) + newfhlen,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&ndp->nfsdl_owner);
 			LIST_INIT(&ndp->nfsdl_lock);
 			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
 			ndp->nfsdl_fhlen = newfhlen;
 			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
 			newnfs_copyincred(cred, &ndp->nfsdl_cred);
 			nfscl_lockinit(&ndp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			ndp->nfsdl_stateid.seqid = *tl++;
 			ndp->nfsdl_stateid.other[0] = *tl++;
 			ndp->nfsdl_stateid.other[1] = *tl++;
 			ndp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				ndp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					ndp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					ndp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 			} else {
 				ndp->nfsdl_flags = NFSCLDL_READ;
 			}
 			if (ret)
 				ndp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 		    NULL, NULL, NULL, p, cred);
 		if (error)
 			goto nfsmout;
 		if (ndp != NULL) {
 			ndp->nfsdl_change = nfsva.na_filerev;
 			ndp->nfsdl_modtime = nfsva.na_mtime;
 			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 		}
 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
 		    do {
 			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
 			    cred, p);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_open");
 		    } while (ret == NFSERR_DELAY);
 		    error = ret;
 		}
 		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
 		    nfscl_assumeposixlocks)
 		    op->nfso_posixlock = 1;
 		else
 		    op->nfso_posixlock = 0;
 
 		/*
 		 * If the server is handing out delegations, but we didn't
 		 * get one because an OpenConfirm was required, try the
 		 * Open again, to get a delegation. This is a harmless no-op,
 		 * from a server's point of view.
 		 */
 		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
 		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
 		    && !error && dp == NULL && ndp == NULL && !recursed) {
 		    do {
 			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
 			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
 			    cred, p, syscred, 1);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
 		    } while (ret == NFSERR_DELAY);
 		    if (ret) {
 			if (ndp != NULL) {
 				FREE((caddr_t)ndp, M_NFSCLDELEG);
 				ndp = NULL;
 			}
 			if (ret == NFSERR_STALECLIENTID ||
 			    ret == NFSERR_STALEDONTRECOVER ||
 			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	if (!error)
 		*dpp = ndp;
 	else if (ndp != NULL)
 		FREE((caddr_t)ndp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * open downgrade rpc
  */
 APPLESTATIC int
 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
     struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 
 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
 	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
 		*tl++ = 0;
 	else
 		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl++ = op->nfso_stateid.other[2];
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 	}
 	if (nd->nd_repstat && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * V4 Close operation.
  */
 APPLESTATIC int
 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
 {
 	struct nfsclclient *clp;
 	int error;
 
 	if (vnode_vtype(vp) != VREG)
 		return (0);
 	if (doclose)
 		error = nfscl_doclose(vp, &clp, p);
 	else
 		error = nfscl_getclose(vp, &clp);
 	if (error)
 		return (error);
 
 	nfscl_clientrelease(clp);
 	return (0);
 }
 
 /*
  * Close the open.
  */
 APPLESTATIC void
 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfscllockowner *lp, *nlp;
 	struct nfscllock *lop, *nlop;
 	struct ucred *tcred;
 	u_int64_t off = 0, len = 0;
 	u_int32_t type = NFSV4LOCKT_READ;
 	int error, do_unlock, trycnt;
 
 	tcred = newnfs_getcred();
 	newnfs_copycred(&op->nfso_cred, tcred);
 	/*
 	 * (Theoretically this could be done in the same
 	 *  compound as the close, but having multiple
 	 *  sequenced Ops in the same compound might be
 	 *  too scary for some servers.)
 	 */
 	if (op->nfso_posixlock) {
 		off = 0;
 		len = NFS64BITSSET;
 		type = NFSV4LOCKT_READ;
 	}
 
 	/*
 	 * Since this function is only called from VOP_INACTIVE(), no
 	 * other thread will be manipulating this Open. As such, the
 	 * lock lists are not being changed by other threads, so it should
 	 * be safe to do this without locking.
 	 */
 	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
 		do_unlock = 1;
 		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
 			if (op->nfso_posixlock == 0) {
 				off = lop->nfslo_first;
 				len = lop->nfslo_end - lop->nfslo_first;
 				if (lop->nfslo_type == F_WRLCK)
 					type = NFSV4LOCKT_WRITE;
 				else
 					type = NFSV4LOCKT_READ;
 			}
 			if (do_unlock) {
 				trycnt = 0;
 				do {
 					error = nfsrpc_locku(nd, nmp, lp, off,
 					    len, type, tcred, p, 0);
 					if ((nd->nd_repstat == NFSERR_GRACE ||
 					    nd->nd_repstat == NFSERR_DELAY) &&
 					    error == 0)
 						(void) nfs_catnap(PZERO,
 						    (int)nd->nd_repstat,
 						    "nfs_close");
 				} while ((nd->nd_repstat == NFSERR_GRACE ||
 				    nd->nd_repstat == NFSERR_DELAY) &&
 				    error == 0 && trycnt++ < 5);
 				if (op->nfso_posixlock)
 					do_unlock = 0;
 			}
 			nfscl_freelock(lop, 0);
 		}
 		/*
 		 * Do a ReleaseLockOwner.
 		 * The lock owner name nfsl_owner may be used by other opens for
 		 * other files but the lock_owner4 name that nfsrpc_rellockown()
 		 * puts on the wire has the file handle for this file appended
 		 * to it, so it can be done now.
 		 */
 		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
 		    lp->nfsl_open->nfso_fhlen, tcred, p);
 	}
 
 	/*
 	 * There could be other Opens for different files on the same
 	 * OpenOwner, so locking is required.
 	 */
 	NFSLOCKCLSTATE();
 	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
 	NFSUNLOCKCLSTATE();
 	do {
 		error = nfscl_tryclose(op, tcred, nmp, p);
 		if (error == NFSERR_GRACE)
 			(void) nfs_catnap(PZERO, error, "nfs_close");
 	} while (error == NFSERR_GRACE);
 	NFSLOCKCLSTATE();
 	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
 
 	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
 		nfscl_freelockowner(lp, 0);
 	nfscl_freeopen(op, 0);
 	NFSUNLOCKCLSTATE();
 	NFSFREECRED(tcred);
 }
 
 /*
  * The actual Close RPC.
  */
 APPLESTATIC int
 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
     int syscred)
 {
 	u_int32_t *tl;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
 	    op->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl = op->nfso_stateid.other[2];
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (nd->nd_repstat == 0)
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 	error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * V4 Open Confirm RPC.
  */
 APPLESTATIC int
 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	int error;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4N(nmp))
 		return (0);		/* No confirmation for NFSv4.1. */
 	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl++ = op->nfso_stateid.other[2];
 	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
 	if (!nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		op->nfso_stateid.seqid = *tl++;
 		op->nfso_stateid.other[0] = *tl++;
 		op->nfso_stateid.other[1] = *tl++;
 		op->nfso_stateid.other[2] = *tl;
 	}
 	error = nd->nd_repstat;
 	if (error == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
  */
 APPLESTATIC int
 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
     struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
 	u_short port;
 	int error, isinet6 = 0, callblen;
 	nfsquad_t confirm;
 	u_int32_t lease;
 	static u_int32_t rev = 0;
 	struct nfsclds *dsp;
 	struct in6_addr a6;
 	struct nfsclsession *tsep;
 
 	if (nfsboottime.tv_sec == 0)
 		NFSSETBOOTTIME(nfsboottime);
 	clp->nfsc_rev = rev++;
 	if (NFSHASNFSV4N(nmp)) {
 		/*
 		 * Either there was no previous session or the
 		 * previous session has failed, so...
 		 * do an ExchangeID followed by the CreateSession.
 		 */
 		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
 		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
 		NFSCL_DEBUG(1, "aft exch=%d\n", error);
 		if (error == 0)
 			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
 			    &nmp->nm_sockreq,
 			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
 		if (error == 0) {
 			NFSLOCKMNT(nmp);
 			/*
 			 * The old sessions cannot be safely free'd
 			 * here, since they may still be used by
 			 * in-progress RPCs.
 			 */
 			tsep = NULL;
 			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
 				tsep = NFSMNT_MDSSESSION(nmp);
 			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
 			    nfsclds_list);
 			/*
 			 * Wake up RPCs waiting for a slot on the
 			 * old session. These will then fail with
 			 * NFSERR_BADSESSION and be retried with the
 			 * new session by nfsv4_setsequence().
 			 * Also wakeup() processes waiting for the
 			 * new session.
 			 */
 			if (tsep != NULL)
 				wakeup(&tsep->nfsess_slots);
 			wakeup(&nmp->nm_sess);
 			NFSUNLOCKMNT(nmp);
 		} else
 			nfscl_freenfsclds(dsp);
 		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
 		if (error == 0 && reclaim == 0) {
 			error = nfsrpc_reclaimcomplete(nmp, cred, p);
 			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
 			if (error == NFSERR_COMPLETEALREADY ||
 			    error == NFSERR_NOTSUPP)
 				/* Ignore this error. */
 				error = 0;
 		}
 		return (error);
 	}
 
 	/*
 	 * Allocate a single session structure for NFSv4.0, because some of
 	 * the fields are used by NFSv4.0 although it doesn't do a session.
 	 */
 	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
 	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
 	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
 	NFSLOCKMNT(nmp);
 	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
 	tsep = NFSMNT_MDSSESSION(nmp);
 	NFSUNLOCKMNT(nmp);
 
 	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 	*tl = txdr_unsigned(clp->nfsc_rev);
 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
 
 	/*
 	 * set up the callback address
 	 */
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFS_CALLBCKPROG);
 	callblen = strlen(nfsv4_callbackaddr);
 	if (callblen == 0)
 		cp = nfscl_getmyip(nmp, &a6, &isinet6);
 	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
 	    (callblen > 0 || cp != NULL)) {
 		port = htons(nfsv4_cbport);
 		cp2 = (u_int8_t *)&port;
 #ifdef INET6
 		if ((callblen > 0 &&
 		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
 			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
 
 			(void) nfsm_strtom(nd, "tcp6", 4);
 			if (callblen == 0) {
 				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
 				ip6add = ip6buf;
 			} else {
 				ip6add = nfsv4_callbackaddr;
 			}
 			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
 			    ip6add, cp2[0], cp2[1]);
 		} else
 #endif
 		{
 			(void) nfsm_strtom(nd, "tcp", 3);
 			if (callblen == 0)
 				snprintf(addr, INET6_ADDRSTRLEN + 9,
 				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
 				    cp[2], cp[3], cp2[0], cp2[1]);
 			else
 				snprintf(addr, INET6_ADDRSTRLEN + 9,
 				    "%s.%d.%d", nfsv4_callbackaddr,
 				    cp2[0], cp2[1]);
 		}
 		(void) nfsm_strtom(nd, addr, strlen(addr));
 	} else {
 		(void) nfsm_strtom(nd, "tcp", 3);
 		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(clp->nfsc_cbident);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	    tsep->nfsess_clientid.lval[0] = *tl++;
 	    tsep->nfsess_clientid.lval[1] = *tl++;
 	    confirm.lval[0] = *tl++;
 	    confirm.lval[1] = *tl;
 	    mbuf_freem(nd->nd_mrep);
 	    nd->nd_mrep = NULL;
 
 	    /*
 	     * and confirm it.
 	     */
 	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
 		NULL);
 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	    *tl++ = tsep->nfsess_clientid.lval[0];
 	    *tl++ = tsep->nfsess_clientid.lval[1];
 	    *tl++ = confirm.lval[0];
 	    *tl = confirm.lval[1];
 	    nd->nd_flag |= ND_USEGSSNAME;
 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	    if (error)
 		return (error);
 	    mbuf_freem(nd->nd_mrep);
 	    nd->nd_mrep = NULL;
 	    if (nd->nd_repstat == 0) {
 		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
 		    nmp->nm_fhsize, NULL, NULL);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 		if (error)
 		    return (error);
 		if (nd->nd_repstat == 0) {
 		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
 			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
 		    if (error)
 			goto nfsmout;
 		    clp->nfsc_renew = NFSCL_RENEW(lease);
 		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
 		    clp->nfsc_clientidrev++;
 		    if (clp->nfsc_clientidrev == 0)
 			clp->nfsc_clientidrev++;
 		}
 	    }
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getattr call.
  */
 APPLESTATIC int
 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *nap, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	
 	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (!nd->nd_repstat)
 		error = nfsm_loadattr(nd, nap);
 	else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getattr call with non-vnode arguemnts.
  */
 APPLESTATIC int
 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
     uint32_t *leasep)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, vers = NFS_VER2;
 	nfsattrbit_t attrbits;
 	
 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
 	if (nd->nd_flag & ND_NFSV4) {
 		vers = NFS_VER4;
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		vers = NFS_VER3;
 	}
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, vers, NULL, 1, xidp, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		if ((nd->nd_flag & ND_NFSV4) != 0)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
 			    NULL, NULL);
 		else
 			error = nfsm_loadattr(nd, nap);
 	} else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do an nfs setattr operation.
  */
 APPLESTATIC int
 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
     void *stuff)
 {
 	int error, expireret = 0, openerr, retrycnt;
 	u_int32_t clidrev = 0, mode;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsfh *nfhp;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
 		mode = NFSV4OPEN_ACCESSWRITE;
 	else
 		mode = NFSV4OPEN_ACCESSREAD;
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		openerr = 1;
 		if (NFSHASNFSV4(nmp)) {
 			nfhp = VTONFS(vp)->n_fhp;
 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
 			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
 			if (error && vnode_vtype(vp) == VREG &&
 			    (mode == NFSV4OPEN_ACCESSWRITE ||
 			     nfstest_openallsetattr)) {
 				/*
 				 * No Open stateid, so try and open the file
 				 * now.
 				 */
 				if (mode == NFSV4OPEN_ACCESSWRITE)
 					openerr = nfsrpc_open(vp, FWRITE, cred,
 					    p);
 				else
 					openerr = nfsrpc_open(vp, FREAD, cred,
 					    p);
 				if (!openerr)
 					(void) nfscl_getstateid(vp,
 					    nfhp->nfh_fh, nfhp->nfh_len,
 					    mode, 0, cred, p, &stateid, &lckp);
 			}
 		}
 		if (vap != NULL)
 			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
 			    rnap, attrflagp, stuff);
 		else
 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
 			    stuff);
 		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_OPENMODE;
 			NFSUNLOCKMNT(nmp);
 		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (!openerr)
 			(void) nfsrpc_close(vp, 0, p);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
 	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
 	     retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 static int
 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *rnap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
 	if (nd->nd_flag & ND_NFSV4)
 		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 	vap->va_type = vnode_vtype(vp);
 	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = newnfs_false;
 	} else if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
 	mbuf_freem(nd->nd_mrep);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * nfs lookup rpc
  */
 APPLESTATIC int
 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	struct nfsfh *nfhp;
 	nfsattrbit_t attrbits;
 	int error = 0, lookupp = 0;
 
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (vnode_vtype(dvp) != VDIR)
 		return (ENOTDIR);
 	nmp = VFSTONFS(vnode_mount(dvp));
 	if (len > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	if (NFSHASNFSV4(nmp) && len == 1 &&
 		name[0] == '.') {
 		/*
 		 * Just return the current dir's fh.
 		 */
 		np = VTONFS(dvp);
 		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
 		nfhp->nfh_len = np->n_fhp->nfh_len;
 		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
 		*nfhpp = nfhp;
 		return (0);
 	}
 	if (NFSHASNFSV4(nmp) && len == 2 &&
 		name[0] == '.' && name[1] == '.') {
 		lookupp = 1;
 		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
 		(void) nfsm_strtom(nd, name, len);
 	}
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, dvp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_repstat) {
 		/*
 		 * When an NFSv4 Lookupp returns ENOENT, it means that
 		 * the lookup is at the root of an fs, so return this dir.
 		 */
 		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
 		    np = VTONFS(dvp);
 		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
 			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
 		    nfhp->nfh_len = np->n_fhp->nfh_len;
 		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
 		    *nfhpp = nfhp;
 		    mbuf_freem(nd->nd_mrep);
 		    return (0);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
 		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 		    ND_NFSV4) {
 			/* Load the directory attributes. */
 			error = nfsm_loadattr(nd, dnap);
 			if (error == 0)
 				*dattrflagp = 1;
 		}
 		goto nfsmout;
 	}
 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 		/* Load the directory attributes. */
 		error = nfsm_loadattr(nd, dnap);
 		if (error != 0)
 			goto nfsmout;
 		*dattrflagp = 1;
 		/* Skip over the Lookup and GetFH operation status values. */
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	}
 	error = nfsm_getfh(nd, nfhpp);
 	if (error)
 		goto nfsmout;
 
 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if ((nd->nd_flag & ND_NFSV3) && !error)
 		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * Do a readlink rpc.
  */
 APPLESTATIC int
 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsnode *np = VTONFS(vp);
 	nfsattrbit_t attrbits;
 	int error, len, cangetattr = 1;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if (!nd->nd_repstat && !error) {
 		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
 		/*
 		 * This seems weird to me, but must have been added to
 		 * FreeBSD for some reason. The only thing I can think of
 		 * is that there was/is some server that replies with
 		 * more link data than it should?
 		 */
 		if (len == NFS_MAXPATHLEN) {
 			NFSLOCKNODE(np);
 			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
 				len = np->n_size;
 				cangetattr = 0;
 			}
 			NFSUNLOCKNODE(np);
 		}
 		error = nfsm_mbufuio(nd, uiop, len);
 		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Read operation.
  */
 APPLESTATIC int
 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	int error, expireret = 0, retrycnt;
 	u_int32_t clidrev = 0;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *newcred;
 	struct nfsfh *nfhp = NULL;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	newcred = cred;
 	if (NFSHASNFSV4(nmp)) {
 		nfhp = np->n_fhp;
 		newcred = NFSNEWCRED(cred);
 	}
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		if (NFSHASNFSV4(nmp))
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
 			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
 			    &lckp);
 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
 		    attrflagp, stuff);
 		if (error == NFSERR_OPENMODE) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_OPENMODE;
 			NFSUNLOCKMNT(nmp);
 		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_read");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
 	    (error == NFSERR_OPENMODE && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
 		NFSFREECRED(newcred);
 	return (error);
 }
 
 /*
  * The actual read RPC.
  */
 static int
 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
     int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	int error = 0, len, retlen, tsiz, eof = 0;
 	struct nfsrv_descript nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsrv_descript *nd = &nfsd;
 	int rsize;
 	off_t tmp_off;
 
 	*attrflagp = 0;
 	tsiz = uio_uio_resid(uiop);
 	tmp_off = uiop->uio_offset + tsiz;
 	NFSLOCKMNT(nmp);
 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
 		NFSUNLOCKMNT(nmp);
 		return (EFBIG);
 	}
 	rsize = nmp->nm_rsize;
 	NFSUNLOCKMNT(nmp);
 	nd->nd_mrep = NULL;
 	while (tsiz > 0) {
 		*attrflagp = 0;
 		len = (tsiz > rsize) ? rsize : tsiz;
 		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
 		if (nd->nd_flag & ND_NFSV4)
 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (nd->nd_flag & ND_NFSV2) {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		} else {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		}
 		/*
 		 * Since I can't do a Getattr for NFSv4 for Write, there
 		 * doesn't seem any point in doing one here, either.
 		 * (See the comment in nfsrpc_writerpc() for more info.)
 		 */
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
 			error = nfsm_loadattr(nd, nap);
 			if (!error)
 				*attrflagp = 1;
 		}
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		if (nd->nd_flag & ND_NFSV3) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 		}
 		NFSM_STRSIZ(retlen, len);
 		error = nfsm_mbufuio(nd, uiop, retlen);
 		if (error)
 			goto nfsmout;
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 		tsiz -= retlen;
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 	return (0);
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs write operation
  * When called_from_strategy != 0, it should return EIO for an error that
  * indicates recovery is in progress, so that the buffer will be left
  * dirty and be written back to the server later. If it loops around,
  * the recovery thread could get stuck waiting for the buffer and recovery
  * will then deadlock.
  */
 APPLESTATIC int
 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff, int called_from_strategy)
 {
 	int error, expireret = 0, retrycnt, nostateid;
 	u_int32_t clidrev = 0;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *newcred;
 	struct nfsfh *nfhp = NULL;
 	nfsv4stateid_t stateid;
 	void *lckp;
 
 	*must_commit = 0;
 	if (nmp->nm_clp != NULL)
 		clidrev = nmp->nm_clp->nfsc_clientidrev;
 	newcred = cred;
 	if (NFSHASNFSV4(nmp)) {
 		newcred = NFSNEWCRED(cred);
 		nfhp = np->n_fhp;
 	}
 	retrycnt = 0;
 	do {
 		lckp = NULL;
 		nostateid = 0;
 		if (NFSHASNFSV4(nmp)) {
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
 			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
 			    &lckp);
 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
 			    stateid.other[2] == 0) {
 				nostateid = 1;
 				NFSCL_DEBUG(1, "stateid0 in write\n");
 			}
 		}
 
 		/*
 		 * If there is no stateid for NFSv4, it means this is an
 		 * extraneous write after close. Basically a poorly
 		 * implemented buffer cache. Just don't do the write.
 		 */
 		if (nostateid)
 			error = 0;
 		else
 			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
 			    newcred, &stateid, p, nap, attrflagp, stuff);
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_write");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error != 0 && (retrycnt >= 4 ||
 	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
 		NFSFREECRED(newcred);
 	return (error);
 }
 
 /*
  * The actual write RPC.
  */
 static int
 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
 	int wccflag = 0, wsize;
 	int32_t backup;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	off_t tmp_off;
 
 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
 	*attrflagp = 0;
 	tsiz = uio_uio_resid(uiop);
 	tmp_off = uiop->uio_offset + tsiz;
 	NFSLOCKMNT(nmp);
 	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
 		NFSUNLOCKMNT(nmp);
 		return (EFBIG);
 	}
 	wsize = nmp->nm_wsize;
 	NFSUNLOCKMNT(nmp);
 	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
 	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
 	while (tsiz > 0) {
 		*attrflagp = 0;
 		len = (tsiz > wsize) ? wsize : tsiz;
 		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
 		if (nd->nd_flag & ND_NFSV4) {
 			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else if (nd->nd_flag & ND_NFSV3) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			u_int32_t x;
 
 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/*
 			 * Not sure why someone changed this, since the
 			 * RFC clearly states that "beginoffset" and
 			 * "totalcount" are ignored, but it wouldn't
 			 * surprise me if there's a busted server out there.
 			 */
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;      /* "begin offset" */
 			*tl++ = x;      /* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;      /* total to this offset */
 			*tl = x;        /* size of this write */
 
 		}
 		nfsm_uiombuf(nd, uiop, len);
 		/*
 		 * Although it is tempting to do a normal Getattr Op in the
 		 * NFSv4 compound, the result can be a nearly hung client
 		 * system if the Getattr asks for Owner and/or OwnerGroup.
 		 * It occurs when the client can't map either the Owner or
 		 * Owner_group name in the Getattr reply to a uid/gid. When
 		 * there is a cache miss, the kernel does an upcall to the
 		 * nfsuserd. Then, it can try and read the local /etc/passwd
 		 * or /etc/group file. It can then block in getnewbuf(),
 		 * waiting for dirty writes to be pushed to the NFS server.
 		 * The only reason this doesn't result in a complete
 		 * deadlock, is that the upcall times out and allows
 		 * the write to complete. However, progress is so slow
 		 * that it might just as well be deadlocked.
 		 * As such, we get the rest of the attributes, but not
 		 * Owner or Owner_group.
 		 * nb: nfscl_loadattrcache() needs to be told that these
 		 *     partial attributes from a write rpc are being
 		 *     passed in, via a argument flag.
 		 */
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSWRITEGETATTR_ATTRBIT(&attrbits);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat) {
 			/*
 			 * In case the rpc gets retried, roll
 			 * the uio fileds changed by nfsm_uiombuf()
 			 * back.
 			 */
 			uiop->uio_offset -= len;
 			uio_uio_resid_add(uiop, len);
 			uio_iov_base_add(uiop, -len);
 			uio_iov_len_add(uiop, len);
 		}
 		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
 			    &wccflag, stuff);
 			if (error)
 				goto nfsmout;
 		}
 		if (!nd->nd_repstat) {
 			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_VERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					goto nfsmout;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uio_iov_base_add(uiop, -(backup));
 					uio_iov_len_add(uiop, backup);
 					uiop->uio_offset -= backup;
 					uio_uio_resid_add(uiop, backup);
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest commitment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSWRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSWRITE_DATASYNC &&
 					commit == NFSWRITE_UNSTABLE)
 					committed = commit;
 				NFSLOCKMNT(nmp);
 				if (!NFSHASWRITEVERF(nmp)) {
 					NFSBCOPY((caddr_t)tl,
 					    (caddr_t)&nmp->nm_verf[0],
 					    NFSX_VERF);
 					NFSSETWRITEVERF(nmp);
 	    			} else if (NFSBCMP(tl, nmp->nm_verf,
 				    NFSX_VERF)) {
 					*must_commit = 1;
 					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 				}
 				NFSUNLOCKMNT(nmp);
 			}
 			if (nd->nd_flag & ND_NFSV4)
 				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
 				error = nfsm_loadattr(nd, nap);
 				if (!error)
 					*attrflagp = NFS_LATTR_NOSHRINK;
 			}
 		} else {
 			error = nd->nd_repstat;
 		}
 		if (error)
 			goto nfsmout;
 		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 		tsiz -= len;
 	}
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	*iomode = committed;
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 APPLESTATIC int
 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	int error = 0;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		if (vtyp == VBLK || vtyp == VCHR) {
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = vtonfsv34_type(vtyp);
 			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
 			*tl = txdr_unsigned(NFSMINOR(rdev));
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = vtonfsv34_type(vtyp);
 		}
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = vtonfsv34_type(vtyp);
 	}
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (vtyp == VCHR || vtyp == VBLK)) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
 		*tl = txdr_unsigned(NFSMINOR(rdev));
 	}
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	if (nd->nd_flag & ND_NFSV2)
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 			if (error)
 				goto nfsmout;
 		}
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs file create call
  * Mostly just call the approriate routine. (I separated out v4, so that
  * error recovery wouldn't be as difficult.)
  */
 APPLESTATIC int
 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	int error = 0, newone, expireret = 0, retrycnt, unlocked;
 	struct nfsclowner *owp;
 	struct nfscldeleg *dp;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
 	u_int32_t clidrev;
 
 	if (NFSHASNFSV4(nmp)) {
 	    retrycnt = 0;
 	    do {
 		dp = NULL;
 		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
 		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
 		    NULL, 1);
 		if (error)
 			return (error);
 		if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		else
 			clidrev = 0;
 		error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode,
 		  owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
 		  dstuff, &unlocked);
 		/*
 		 * There is no need to invalidate cached attributes here,
 		 * since new post-delegation issue attributes are always
 		 * returned by nfsrpc_createv4() and these will update the
 		 * attribute cache.
 		 */
 		if (dp != NULL)
 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
 		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		    error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_open");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
 			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 			retrycnt++;
 		}
 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION ||
 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
 	    if (error && retrycnt >= 4)
 		    error = EIO;
 	} else {
 		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
 		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
 		    dstuff);
 	}
 	return (error);
 }
 
 /*
  * The create rpc for v2 and 3.
  */
 static int
 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
     int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	int error = 0;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
 		} else {
 			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 			nfscl_fillsattr(nd, vap, dvp, 0, 0);
 		}
 	} else {
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
 	}
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 static int
 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff, int *unlockedp)
 {
 	u_int32_t *tl;
 	int error = 0, deleg, newone, ret, acesize, limitby;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsclopen *op;
 	struct nfscldeleg *dp = NULL;
 	struct nfsnode *np;
 	struct nfsfh *nfhp;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t stateid;
 	u_int32_t rflags;
 	struct nfsmount *nmp;
 	struct nfsclsession *tsep;
 
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	*unlockedp = 0;
 	*nfhpp = NULL;
 	*dpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
 	/*
 	 * For V4, this is actually an Open op.
 	 */
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(owp->nfsow_seqid);
 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
 	    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
 	if (fmode & O_EXCL) {
 		if (NFSHASNFSV4N(nmp)) {
 			if (NFSHASSESSPERSIST(nmp)) {
 				/* Use GUARDED for persistent sessions. */
 				*tl = txdr_unsigned(NFSCREATE_GUARDED);
 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
 			} else {
 				/* Otherwise, use EXCLUSIVE4_1. */
 				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 				*tl++ = cverf.lval[0];
 				*tl = cverf.lval[1];
 				nfscl_fillsattr(nd, vap, dvp, 0, 0);
 			}
 		} else {
 			/* NFSv4.0 */
 			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 			*tl++ = cverf.lval[0];
 			*tl = cverf.lval[1];
 		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 	(void) nfsm_strtom(nd, name, namelen);
 	/* Get the new file's handle and attributes. */
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	/* Get the directory's post-op attributes. */
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_PUTFH);
 	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 		    6 * NFSX_UNSIGNED);
 		stateid.seqid = *tl++;
 		stateid.other[0] = *tl++;
 		stateid.other[1] = *tl++;
 		stateid.other[2] = *tl;
 		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
 		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		deleg = fxdr_unsigned(int, *tl);
 		if (deleg == NFSV4OPEN_DELEGATEREAD ||
 		    deleg == NFSV4OPEN_DELEGATEWRITE) {
 			if (!(owp->nfsow_clp->nfsc_flags &
 			      NFSCLFLAGS_FIRSTDELEG))
 				owp->nfsow_clp->nfsc_flags |=
 				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
 			MALLOC(dp, struct nfscldeleg *,
 			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
 			    M_NFSCLDELEG, M_WAITOK);
 			LIST_INIT(&dp->nfsdl_owner);
 			LIST_INIT(&dp->nfsdl_lock);
 			dp->nfsdl_clp = owp->nfsow_clp;
 			newnfs_copyincred(cred, &dp->nfsdl_cred);
 			nfscl_lockinit(&dp->nfsdl_rwlock);
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			dp->nfsdl_stateid.seqid = *tl++;
 			dp->nfsdl_stateid.other[0] = *tl++;
 			dp->nfsdl_stateid.other[1] = *tl++;
 			dp->nfsdl_stateid.other[2] = *tl++;
 			ret = fxdr_unsigned(int, *tl);
 			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
 				dp->nfsdl_flags = NFSCLDL_WRITE;
 				/*
 				 * Indicates how much the file can grow.
 				 */
 				NFSM_DISSECT(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				limitby = fxdr_unsigned(int, *tl++);
 				switch (limitby) {
 				case NFSV4OPEN_LIMITSIZE:
 					dp->nfsdl_sizelimit = fxdr_hyper(tl);
 					break;
 				case NFSV4OPEN_LIMITBLOCKS:
 					dp->nfsdl_sizelimit =
 					    fxdr_unsigned(u_int64_t, *tl++);
 					dp->nfsdl_sizelimit *=
 					    fxdr_unsigned(u_int64_t, *tl);
 					break;
 				default:
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 			} else {
 				dp->nfsdl_flags = NFSCLDL_READ;
 			}
 			if (ret)
 				dp->nfsdl_flags |= NFSCLDL_RECALL;
 			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
 			    &acesize, p);
 			if (error)
 				goto nfsmout;
 		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
 		/* Get rid of the PutFH and Getattr status values. */
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		/* Load the directory attributes. */
 		error = nfsm_loadattr(nd, dnap);
 		if (error)
 			goto nfsmout;
 		*dattrflagp = 1;
 		if (dp != NULL && *attrflagp) {
 			dp->nfsdl_change = nnap->na_filerev;
 			dp->nfsdl_modtime = nnap->na_mtime;
 			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
 		}
 		/*
 		 * We can now complete the Open state.
 		 */
 		nfhp = *nfhpp;
 		if (dp != NULL) {
 			dp->nfsdl_fhlen = nfhp->nfh_len;
 			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
 		}
 		/*
 		 * Get an Open structure that will be
 		 * attached to the OpenOwner, acquired already.
 		 */
 		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
 		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
 		    cred, p, NULL, &op, &newone, NULL, 0);
 		if (error)
 			goto nfsmout;
 		op->nfso_stateid = stateid;
 		newnfs_copyincred(cred, &op->nfso_cred);
 		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
 		    do {
 			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
 			    nfhp->nfh_len, op, cred, p);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_create");
 		    } while (ret == NFSERR_DELAY);
 		    error = ret;
 		}
 
 		/*
 		 * If the server is handing out delegations, but we didn't
 		 * get one because an OpenConfirm was required, try the
 		 * Open again, to get a delegation. This is a harmless no-op,
 		 * from a server's point of view.
 		 */
 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
 		    !error && dp == NULL) {
 		    do {
 			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
 			    nfhp->nfh_fh, nfhp->nfh_len,
 			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
 			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
 			if (ret == NFSERR_DELAY)
 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
 		    } while (ret == NFSERR_DELAY);
 		    if (ret) {
 			if (dp != NULL) {
 				FREE((caddr_t)dp, M_NFSCLDELEG);
 				dp = NULL;
 			}
 			if (ret == NFSERR_STALECLIENTID ||
 			    ret == NFSERR_STALEDONTRECOVER ||
 			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
 		nfscl_openrelease(nmp, op, error, newone);
 		*unlockedp = 1;
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	if (error == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(owp->nfsow_clp);
 nfsmout:
 	if (!error)
 		*dpp = dp;
 	else if (dp != NULL)
 		FREE((caddr_t)dp, M_NFSCLDELEG);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Nfs remove rpc
  */
 APPLESTATIC int
 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
     void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	nfsv4stateid_t dstateid;
 	int error, ret = 0, i;
 
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	nmp = VFSTONFS(vnode_mount(dvp));
 tryagain:
 	if (NFSHASNFSV4(nmp) && ret == 0) {
 		ret = nfscl_removedeleg(vp, p, &dstateid);
 		if (ret == 1) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = dstateid.seqid;
 			*tl++ = dstateid.other[0];
 			*tl++ = dstateid.other[1];
 			*tl++ = dstateid.other[2];
 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
 			np = VTONFS(dvp);
 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 			    np->n_fhp->nfh_len, 0);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_REMOVE);
 		}
 	} else {
 		ret = 0;
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
 		if (ret > 0 && nd->nd_repstat != 0 &&
 		    (nd->nd_flag & ND_NOMOREDATA)) {
 			/*
 			 * If the Delegreturn failed, try again without
 			 * it. The server will Recall, as required.
 			 */
 			mbuf_freem(nd->nd_mrep);
 			goto tryagain;
 		}
 		for (i = 0; i < (ret * 2); i++) {
 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 			    ND_NFSV4) {
 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			    if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 			}
 		}
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do an nfs rename rpc.
  */
 APPLESTATIC int
 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t fdstateid, tdstateid;
 	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
 	
 	*fattrflagp = 0;
 	*tattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(fdvp));
 	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 tryagain:
 	if (NFSHASNFSV4(nmp) && ret == 0) {
 		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
 		    &tdstateid, &gottd, p);
 		if (gotfd && gottd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
 		} else if (gotfd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
 		} else if (gottd) {
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
 		}
 		if (gotfd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = fdstateid.seqid;
 			*tl++ = fdstateid.other[0];
 			*tl++ = fdstateid.other[1];
 			*tl = fdstateid.other[2];
 			if (gottd) {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_PUTFH);
 				np = VTONFS(tvp);
 				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 				    np->n_fhp->nfh_len, 0);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
 			}
 		}
 		if (gottd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 			if (NFSHASNFSV4N(nmp))
 				*tl++ = 0;
 			else
 				*tl++ = tdstateid.seqid;
 			*tl++ = tdstateid.other[0];
 			*tl++ = tdstateid.other[1];
 			*tl = tdstateid.other[2];
 		}
 		if (ret > 0) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
 			np = VTONFS(fdvp);
 			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
 			    np->n_fhp->nfh_len, 0);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
 		}
 	} else {
 		ret = 0;
 	}
 	if (ret == 0)
 		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSWCCATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_V4WCCATTR;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_RENAME);
 	}
 	(void) nfsm_strtom(nd, fnameptr, fnamelen);
 	if (!(nd->nd_flag & ND_NFSV4))
 		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
 			VTONFS(tdvp)->n_fhp->nfh_len, 0);
 	(void) nfsm_strtom(nd, tnameptr, tnamelen);
 	error = nfscl_request(nd, fdvp, p, cred, fstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		/* For NFSv4, parse out any Delereturn replies. */
 		if (ret > 0 && nd->nd_repstat != 0 &&
 		    (nd->nd_flag & ND_NOMOREDATA)) {
 			/*
 			 * If the Delegreturn failed, try again without
 			 * it. The server will Recall, as required.
 			 */
 			mbuf_freem(nd->nd_mrep);
 			goto tryagain;
 		}
 		for (i = 0; i < (ret * 2); i++) {
 			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
 			    ND_NFSV4) {
 			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			    if (*(tl + 1)) {
 				if (i == 0 && ret > 1) {
 				    /*
 				     * If the Delegreturn failed, try again
 				     * without it. The server will Recall, as
 				     * required.
 				     * If ret > 1, the first iteration of this
 				     * loop is the second DelegReturn result.
 				     */
 				    mbuf_freem(nd->nd_mrep);
 				    goto tryagain;
 				} else {
 				    nd->nd_flag |= ND_NOMOREDATA;
 				}
 			    }
 			}
 		}
 		/* Now, the first wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
 		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
 		    fstuff);
 		/* and the second wcc attribute reply. */
 		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
 		    !error) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			if (*(tl + 1))
 				nd->nd_flag |= ND_NOMOREDATA;
 		}
 		if (!error)
 			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
 			    NULL, tstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs hard link create rpc
  */
 APPLESTATIC int
 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error = 0;
 
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 	}
 	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
 		VTONFS(dvp)->n_fhp->nfh_len, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSWCCATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_V4WCCATTR;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_LINK);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, vp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV3) {
 		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
 		if (!error)
 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
 			    NULL, dstuff);
 	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
 		/*
 		 * First, parse out the PutFH and Getattr result.
 		 */
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (!(*(tl + 1)))
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (*(tl + 1))
 			nd->nd_flag |= ND_NOMOREDATA;
 		/*
 		 * Get the pre-op attributes.
 		 */
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs symbolic link create rpc
  */
 APPLESTATIC int
 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	int slen, error = 0;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(dvp));
 	slen = strlen(target);
 	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFLNK);
 		(void) nfsm_strtom(nd, target, slen);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
 	if (!(nd->nd_flag & ND_NFSV4))
 		(void) nfsm_strtom(nd, target, slen);
 	if (nd->nd_flag & ND_NFSV2)
 		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if ((nd->nd_flag & ND_NFSV3) && !error) {
 		if (!nd->nd_repstat)
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (!error)
 			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
 			    NULL, dstuff);
 	}
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 * Only do this if vfs.nfs.ignore_eexist is set.
 	 * Never do this for NFSv4.1 or later minor versions, since sessions
 	 * should guarantee "exactly once" RPC semantics.
 	 */
 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
 	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs make dir rpc
  */
 APPLESTATIC int
 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
     int *dattrflagp, void *dstuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error = 0;
 	struct nfsfh *fhp;
 	struct nfsmount *nmp;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(dvp));
 	fhp = VTONFS(dvp)->n_fhp;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFDIR);
 	}
 	(void) nfsm_strtom(nd, name, namelen);
 	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSGETATTR_ATTRBIT(&attrbits);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_PUTFH);
 		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & ND_NFSV4)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (!nd->nd_repstat && !error) {
 		if (nd->nd_flag & ND_NFSV4) {
 			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		}
 		if (!error)
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
 			/* Get rid of the PutFH and Getattr status values. */
 			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Load the directory attributes. */
 			error = nfsm_loadattr(nd, dnap);
 			if (error == 0)
 				*dattrflagp = 1;
 		}
 	}
 	if ((nd->nd_flag & ND_NFSV3) && !error)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 * Only do this if vfs.nfs.ignore_eexist is set.
 	 * Never do this for NFSv4.1 or later minor versions, since sessions
 	 * should guarantee "exactly once" RPC semantics.
 	 */
 	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
 	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 APPLESTATIC int
 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error = 0;
 
 	*dattrflagp = 0;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
 	(void) nfsm_strtom(nd, name, namelen);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * Readdir rpc.
  * Always returns with either uio_resid unchanged, if you are at the
  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
  * filled in.
  * I felt this would allow caching of directory blocks more easily
  * than returning a pertially filled block.
  * Directory offset cookies:
  * Oh my, what to do with them...
  * I can think of three ways to deal with them:
  * 1 - have the layer above these RPCs maintain a map between logical
  *     directory byte offsets and the NFS directory offset cookies
  * 2 - pass the opaque directory offset cookies up into userland
  *     and let the libc functions deal with them, via the system call
  * 3 - return them to userland in the "struct dirent", so future versions
  *     of libc can use them and do whatever is necessary to make things work
  *     above these rpc calls, in the meantime
  * For now, I do #3 by "hiding" the directory offset cookies after the
  * d_name field in struct dirent. This is space inside d_reclen that
  * will be ignored by anything that doesn't know about them.
  * The directory offset cookies are filled in as the last 8 bytes of
  * each directory entry, after d_name. Someday, the userland libc
  * functions may be able to use these. In the meantime, it satisfies
  * OpenBSD's requirements for cookies being returned.
  * If expects the directory offset cookie for the read to be in uio_offset
  * and returns the one for the next entry after this directory block in
  * there, as well.
  */
 APPLESTATIC int
 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     int *eofp, void *stuff)
 {
 	int len, left;
 	struct dirent *dp = NULL;
 	u_int32_t *tl;
 	nfsquad_t cookie, ncookie;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsvattr nfsva;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
 	long dotfileid, dotdotfileid = 0;
 	u_int32_t fakefileno = 0xffffffff, rderr;
 	char *cp;
 	nfsattrbit_t attrbits, dattrbits;
 	u_int32_t *tl2 = NULL;
 	size_t tresid;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
 
 	/*
 	 * There is no point in reading a lot more than uio_resid, however
 	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
 	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
 	 * will never make readsize > nm_readdirsize.
 	 */
 	readsize = nmp->nm_readdirsize;
 	if (readsize > uio_uio_resid(uiop))
 		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
 
 	*attrflagp = 0;
 	if (eofp)
 		*eofp = 0;
 	tresid = uio_uio_resid(uiop);
 	cookie.lval[0] = cookiep->nfsuquad[0];
 	cookie.lval[1] = cookiep->nfsuquad[1];
 	nd->nd_mrep = NULL;
 
 	/*
 	 * For NFSv4, first create the "." and ".." entries.
 	 */
 	if (NFSHASNFSV4(nmp)) {
 		reqsize = 6 * NFSX_UNSIGNED;
 		NFSGETATTR_ATTRBIT(&dattrbits);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
 		    NFSATTRBIT_MOUNTEDONFILEID)) {
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 			gotmnton = 1;
 		} else {
 			/*
 			 * Must fake it. Use the fileno, except when the
 			 * fsid is != to that of the directory. For that
 			 * case, generate a fake fileno that is not the same.
 			 */
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
 			gotmnton = 0;
 		}
 
 		/*
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
 			dotfileid = 0;	/* Fake out the compiler. */
 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			    error = nfsm_loadattr(nd, &nfsva);
 			    if (error != 0)
 				goto nfsmout;
 			    dotfileid = nfsva.na_fileid;
 			}
 			if (nd->nd_repstat == 0) {
 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
 				error = EPERM;
 			    if (!error) {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dotdotfileid = nfsva.na_mntonfileno;
 				    else
 					dotdotfileid = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dotdotfileid = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dotdotfileid = fakefileno;
 				}
 			    }
 			} else if (nd->nd_repstat == NFSERR_NOENT) {
 			    /*
 			     * Lookupp returns NFSERR_NOENT when we are
 			     * at the root, so just use the current dir.
 			     */
 			    nd->nd_repstat = 0;
 			    dotdotfileid = dotfileid;
 			} else {
 			    error = nd->nd_repstat;
 			}
 			mbuf_freem(nd->nd_mrep);
 			if (error)
 			    return (error);
 			nd->nd_mrep = NULL;
-			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+			dp = (struct dirent *)uio_iov_base(uiop);
+			dp->d_off = 0;
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotfileid;
 			dp->d_namlen = 1;
+			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
 			dp->d_name[0] = '.';
-			dp->d_name[1] = '\0';
-			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
+			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
-			tl = (u_int32_t *)&dp->d_name[4];
+			tl = (u_int32_t *)&dp->d_name[8];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
-			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+			dp = (struct dirent *)uio_iov_base(uiop);
+			dp->d_off = 0;
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotdotfileid;
 			dp->d_namlen = 2;
+			*((uint64_t *)dp->d_name) = 0;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '.';
-			dp->d_name[2] = '\0';
-			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
+			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
-			tl = (u_int32_t *)&dp->d_name[4];
+			tl = (u_int32_t *)&dp->d_name[8];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 		}
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
 	} else {
 		reqsize = 5 * NFSX_UNSIGNED;
 	}
 
 
 	/*
 	 * Loop around doing readdir rpc's of size readsize.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		*attrflagp = 0;
 		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.lval[1];
 			*tl = txdr_unsigned(readsize);
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, reqsize);
 			*tl++ = cookie.lval[0];
 			*tl++ = cookie.lval[1];
 			if (cookie.qval == 0) {
 				*tl++ = 0;
 				*tl++ = 0;
 			} else {
 				NFSLOCKNODE(dnp);
 				*tl++ = dnp->n_cookieverf.nfsuquad[0];
 				*tl++ = dnp->n_cookieverf.nfsuquad[1];
 				NFSUNLOCKNODE(dnp);
 			}
 			if (nd->nd_flag & ND_NFSV4) {
 				*tl++ = txdr_unsigned(readsize);
 				*tl = txdr_unsigned(readsize);
 				(void) nfsrv_putattrbit(nd, &attrbits);
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
 				(void) nfsrv_putattrbit(nd, &dattrbits);
 			} else {
 				*tl = txdr_unsigned(readsize);
 			}
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			if (nd->nd_flag & ND_NFSV3)
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 			if (!nd->nd_repstat && !error) {
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 				NFSLOCKNODE(dnp);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 				NFSUNLOCKNODE(dnp);
 			}
 		}
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 		if (!more_dirs)
 			tryformoredirs = 0;
 	
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (nd->nd_flag & ND_NFSV4) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 				len = fxdr_unsigned(int, *tl);
 			} else if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				nfsva.na_fileid = fxdr_hyper(tl);
 				tl += 2;
 				len = fxdr_unsigned(int, *tl);
 			} else {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_fileid =
 				    fxdr_unsigned(long, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
-			tlen = NFSM_RNDUP(len);
+			tlen = roundup2(len, 8);
 			if (tlen == len)
-				tlen += 4;  /* To ensure null termination */
+				tlen += 8;  /* To ensure null termination. */
 			left = DIRBLKSIZ - blksiz;
-			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) {
+			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
 				dp->d_reclen += left;
 				uio_iov_base_add(uiop, left);
 				uio_iov_len_add(uiop, -(left));
 				uio_uio_resid_add(uiop, -(left));
 				uiop->uio_offset += left;
 				blksiz = 0;
 			}
-			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
+			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
+			    uio_uio_resid(uiop))
 				bigenough = 0;
 			if (bigenough) {
-				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+				dp = (struct dirent *)uio_iov_base(uiop);
+				dp->d_off = 0;
 				dp->d_namlen = len;
-				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
+				dp->d_reclen = _GENERIC_DIRLEN(len) +
+				    NFSX_HYPER;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
 				uiop->uio_offset += DIRHDSIZ;
 				uio_iov_base_add(uiop, DIRHDSIZ);
 				uio_iov_len_add(uiop, -(DIRHDSIZ));
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
-				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
+				cp = uio_iov_base(uiop);
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				cp += tlen;	/* points to cookie storage */
 				tl2 = (u_int32_t *)cp;
 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
 				uiop->uio_offset += (tlen + NFSX_HYPER);
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
 					goto nfsmout;
 			}
 			if (nd->nd_flag & ND_NFSV4) {
 				rderr = 0;
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, &rderr, p, cred);
 				if (error)
 					goto nfsmout;
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			} else if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 			} else {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				ncookie.lval[0] = 0;
 				ncookie.lval[1] = *tl++;
 			}
 			if (bigenough) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
 				} else {
 				    if (gotmnton) {
 					if (nfsva.na_mntonfileno != 0xffffffff)
 					    dp->d_fileno = nfsva.na_mntonfileno;
 					else
 					    dp->d_fileno = nfsva.na_fileid;
 				    } else if (nfsva.na_filesid[0] ==
 					dnp->n_vattr.na_filesid[0] &&
 					nfsva.na_filesid[1] ==
 					dnp->n_vattr.na_filesid[1]) {
 					dp->d_fileno = nfsva.na_fileid;
 				    } else {
 					do {
 					    fakefileno--;
 					} while (fakefileno ==
 					    nfsva.na_fileid);
 					dp->d_fileno = fakefileno;
 				    }
 				    dp->d_type = vtonfs_dtype(nfsva.na_type);
 				}
 			    } else {
 				dp->d_fileno = nfsva.na_fileid;
 			    }
 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
 				ncookie.lval[0];
 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
 				ncookie.lval[1];
 			}
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 			if (tryformoredirs)
 				more_dirs = !eof;
 			if (nd->nd_flag & ND_NFSV4) {
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 				if (error)
 					goto nfsmout;
 			}
 		}
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uio_iov_base_add(uiop, left);
 		uio_iov_len_add(uiop, -(left));
 		uio_uio_resid_add(uiop, -(left));
 		uiop->uio_offset += left;
 	}
 
 	/*
 	 * If returning no data, assume end of file.
 	 * If not bigenough, return not end of file, since you aren't
 	 *    returning all the data
 	 * Otherwise, return the eof flag from the server.
 	 */
 	if (eofp) {
 		if (tresid == ((size_t)(uio_uio_resid(uiop))))
 			*eofp = 1;
 		else if (!bigenough)
 			*eofp = 0;
 		else
 			*eofp = eof;
 	}
 
 	/*
 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
 	 */
-	while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
-		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
+		dp = (struct dirent *)uio_iov_base(uiop);
 		dp->d_type = DT_UNKNOWN;
 		dp->d_fileno = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		tl = (u_int32_t *)&dp->d_name[4];
 		*tl++ = cookie.lval[0];
 		*tl = cookie.lval[1];
 		dp->d_reclen = DIRBLKSIZ;
 		uio_iov_base_add(uiop, DIRBLKSIZ);
 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
 		uiop->uio_offset += DIRBLKSIZ;
 	}
 
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 #ifndef APPLE
 /*
  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
  * (Also used for NFS V4 when mount flag set.)
  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
  */
 APPLESTATIC int
 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     int *eofp, void *stuff)
 {
 	int len, left;
 	struct dirent *dp = NULL;
 	u_int32_t *tl;
 	vnode_t newvp = NULLVP;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	struct nfsvattr nfsva;
 	struct nfsfh *nfhp;
 	nfsquad_t cookie, ncookie;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
 	int isdotdot = 0, unlocknewvp = 0;
 	long dotfileid, dotdotfileid = 0, fileno = 0;
 	char *cp;
 	nfsattrbit_t attrbits, dattrbits;
 	size_t tresid;
 	u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr;
 	struct timespec dctime;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
 	timespecclear(&dctime);
 	*attrflagp = 0;
 	if (eofp != NULL)
 		*eofp = 0;
 	ndp->ni_dvp = vp;
 	nd->nd_mrep = NULL;
 	cookie.lval[0] = cookiep->nfsuquad[0];
 	cookie.lval[1] = cookiep->nfsuquad[1];
 	tresid = uio_uio_resid(uiop);
 
 	/*
 	 * For NFSv4, first create the "." and ".." entries.
 	 */
 	if (NFSHASNFSV4(nmp)) {
 		NFSGETATTR_ATTRBIT(&dattrbits);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
 		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
 		    NFSATTRBIT_MOUNTEDONFILEID)) {
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 			gotmnton = 1;
 		} else {
 			/*
 			 * Must fake it. Use the fileno, except when the
 			 * fsid is != to that of the directory. For that
 			 * case, generate a fake fileno that is not the same.
 			 */
 			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
 			gotmnton = 0;
 		}
 
 		/*
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
 			dotfileid = 0;	/* Fake out the compiler. */
 			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
 			    error = nfsm_loadattr(nd, &nfsva);
 			    if (error != 0)
 				goto nfsmout;
 			    dctime = nfsva.na_ctime;
 			    dotfileid = nfsva.na_fileid;
 			}
 			if (nd->nd_repstat == 0) {
 			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
 				error = EPERM;
 			    if (!error) {
 				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, NULL, p, cred);
 				if (error) {
 				    dotdotfileid = dotfileid;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dotdotfileid = nfsva.na_mntonfileno;
 				    else
 					dotdotfileid = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dotdotfileid = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dotdotfileid = fakefileno;
 				}
 			    }
 			} else if (nd->nd_repstat == NFSERR_NOENT) {
 			    /*
 			     * Lookupp returns NFSERR_NOENT when we are
 			     * at the root, so just use the current dir.
 			     */
 			    nd->nd_repstat = 0;
 			    dotdotfileid = dotfileid;
 			} else {
 			    error = nd->nd_repstat;
 			}
 			mbuf_freem(nd->nd_mrep);
 			if (error)
 			    return (error);
 			nd->nd_mrep = NULL;
 			dp = (struct dirent *)uio_iov_base(uiop);
+			dp->d_off = 0;
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotfileid;
 			dp->d_namlen = 1;
+			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
 			dp->d_name[0] = '.';
-			dp->d_name[1] = '\0';
-			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
+			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
-			tl = (u_int32_t *)&dp->d_name[4];
+			tl = (u_int32_t *)&dp->d_name[8];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 			dp = (struct dirent *)uio_iov_base(uiop);
+			dp->d_off = 0;
 			dp->d_type = DT_DIR;
 			dp->d_fileno = dotdotfileid;
 			dp->d_namlen = 2;
+			*((uint64_t *)dp->d_name) = 0;
 			dp->d_name[0] = '.';
 			dp->d_name[1] = '.';
-			dp->d_name[2] = '\0';
-			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
+			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
 			/*
 			 * Just make these offset cookie 0.
 			 */
-			tl = (u_int32_t *)&dp->d_name[4];
+			tl = (u_int32_t *)&dp->d_name[8];
 			*tl++ = 0;
 			*tl = 0;
 			blksiz += dp->d_reclen;
 			uio_uio_resid_add(uiop, -(dp->d_reclen));
 			uiop->uio_offset += dp->d_reclen;
 			uio_iov_base_add(uiop, dp->d_reclen);
 			uio_iov_len_add(uiop, -(dp->d_reclen));
 		}
 		NFSREADDIRPLUS_ATTRBIT(&attrbits);
 		if (gotmnton)
 			NFSSETBIT_ATTRBIT(&attrbits,
 			    NFSATTRBIT_MOUNTEDONFILEID);
 	}
 
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		*attrflagp = 0;
 		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
  		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.lval[0];
 		*tl++ = cookie.lval[1];
 		if (cookie.qval == 0) {
 			*tl++ = 0;
 			*tl++ = 0;
 		} else {
 			NFSLOCKNODE(dnp);
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 			NFSUNLOCKNODE(dnp);
 		}
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		if (nd->nd_flag & ND_NFSV4) {
 			(void) nfsrv_putattrbit(nd, &attrbits);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_GETATTR);
 			(void) nfsrv_putattrbit(nd, &dattrbits);
 		}
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (nd->nd_repstat || error) {
 			if (!error)
 				error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
 			dctime = nap->na_ctime;
 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		NFSLOCKNODE(dnp);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		NFSUNLOCKNODE(dnp);
 		more_dirs = fxdr_unsigned(int, *tl);
 		if (!more_dirs)
 			tryformoredirs = 0;
 	
 		/* loop through the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (nd->nd_flag & ND_NFSV4) {
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 			} else {
 				fileno = fxdr_unsigned(long, *++tl);
 				tl++;
 			}
 			len = fxdr_unsigned(int, *tl);
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
-			tlen = NFSM_RNDUP(len);
+			tlen = roundup2(len, 8);
 			if (tlen == len)
-				tlen += 4;  /* To ensure null termination */
+				tlen += 8;  /* To ensure null termination. */
 			left = DIRBLKSIZ - blksiz;
-			if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) {
+			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
 				dp->d_reclen += left;
 				uio_iov_base_add(uiop, left);
 				uio_iov_len_add(uiop, -(left));
 				uio_uio_resid_add(uiop, -(left));
 				uiop->uio_offset += left;
 				blksiz = 0;
 			}
-			if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
+			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
+			    uio_uio_resid(uiop))
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uio_iov_base(uiop);
+				dp->d_off = 0;
 				dp->d_namlen = len;
-				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
+				dp->d_reclen = _GENERIC_DIRLEN(len) +
+				    NFSX_HYPER;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uio_uio_resid_add(uiop, -(DIRHDSIZ));
 				uiop->uio_offset += DIRHDSIZ;
 				uio_iov_base_add(uiop, DIRHDSIZ);
 				uio_iov_len_add(uiop, -(DIRHDSIZ));
 				cnp->cn_nameptr = uio_iov_base(uiop);
 				cnp->cn_namelen = len;
 				NFSCNHASHZERO(cnp);
 				error = nfsm_mbufuio(nd, uiop, len);
 				if (error)
 					goto nfsmout;
 				cp = uio_iov_base(uiop);
 				tlen -= len;
 				*cp = '\0';
 				cp += tlen;	/* points to cookie storage */
 				tl2 = (u_int32_t *)cp;
 				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
 				    cnp->cn_nameptr[1] == '.')
 					isdotdot = 1;
 				else
 					isdotdot = 0;
 				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
 				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
 				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
 				uiop->uio_offset += (tlen + NFSX_HYPER);
 			} else {
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 				if (error)
 					goto nfsmout;
 			}
 			nfhp = NULL;
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
 				ncookie.lval[0] = *tl++;
 				ncookie.lval[1] = *tl++;
 				attrflag = fxdr_unsigned(int, *tl);
 				if (attrflag) {
 				  error = nfsm_loadattr(nd, &nfsva);
 				  if (error)
 					goto nfsmout;
 				}
 				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
 				if (*tl) {
 					error = nfsm_getfh(nd, &nfhp);
 					if (error)
 					    goto nfsmout;
 				}
 				if (!attrflag && nfhp != NULL) {
 					FREE((caddr_t)nfhp, M_NFSFH);
 					nfhp = NULL;
 				}
 			} else {
 				rderr = 0;
 				nfsva.na_mntonfileno = 0xffffffff;
 				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
 				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
 				    NULL, NULL, &rderr, p, cred);
 				if (error)
 					goto nfsmout;
 			}
 
 			if (bigenough) {
 			    if (nd->nd_flag & ND_NFSV4) {
 				if (rderr) {
 				    dp->d_fileno = 0;
 				} else if (gotmnton) {
 				    if (nfsva.na_mntonfileno != 0xffffffff)
 					dp->d_fileno = nfsva.na_mntonfileno;
 				    else
 					dp->d_fileno = nfsva.na_fileid;
 				} else if (nfsva.na_filesid[0] ==
 				    dnp->n_vattr.na_filesid[0] &&
 				    nfsva.na_filesid[1] ==
 				    dnp->n_vattr.na_filesid[1]) {
 				    dp->d_fileno = nfsva.na_fileid;
 				} else {
 				    do {
 					fakefileno--;
 				    } while (fakefileno ==
 					nfsva.na_fileid);
 				    dp->d_fileno = fakefileno;
 				}
 			    } else {
 				dp->d_fileno = fileno;
 			    }
 			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
 				ncookie.lval[0];
 			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
 				ncookie.lval[1];
 
 			    if (nfhp != NULL) {
 				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
 				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
 				    VREF(vp);
 				    newvp = vp;
 				    unlocknewvp = 0;
 				    FREE((caddr_t)nfhp, M_NFSFH);
 				    np = dnp;
 				} else if (isdotdot != 0) {
 				    /*
 				     * Skip doing a nfscl_nget() call for "..".
 				     * There's a race between acquiring the nfs
 				     * node here and lookups that look for the
 				     * directory being read (in the parent).
 				     * It would try to get a lock on ".." here,
 				     * owning the lock on the directory being
 				     * read. Lookup will hold the lock on ".."
 				     * and try to acquire the lock on the
 				     * directory being read.
 				     * If the directory is unlocked/relocked,
 				     * then there is a LOR with the buflock
 				     * vp is relocked.
 				     */
 				    free(nfhp, M_NFSFH);
 				} else {
 				    error = nfscl_nget(vnode_mount(vp), vp,
 				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
 				    if (!error) {
 					newvp = NFSTOV(np);
 					unlocknewvp = 1;
 				    }
 				}
 				nfhp = NULL;
 				if (newvp != NULLVP) {
 				    error = nfscl_loadattrcache(&newvp,
 					&nfsva, NULL, NULL, 0, 0);
 				    if (error) {
 					if (unlocknewvp)
 					    vput(newvp);
 					else
 					    vrele(newvp);
 					goto nfsmout;
 				    }
 				    dp->d_type =
 					vtonfs_dtype(np->n_vattr.na_type);
 				    ndp->ni_vp = newvp;
 				    NFSCNHASH(cnp, HASHINIT);
 				    if (cnp->cn_namelen <= NCHNAMLEN &&
 					(newvp->v_type != VDIR ||
 					 dctime.tv_sec != 0)) {
 					cache_enter_time(ndp->ni_dvp,
 					    ndp->ni_vp, cnp,
 					    &nfsva.na_ctime,
 					    newvp->v_type != VDIR ? NULL :
 					    &dctime);
 				    }
 				    if (unlocknewvp)
 					vput(newvp);
 				    else
 					vrele(newvp);
 				    newvp = NULLVP;
 				}
 			    }
 			} else if (nfhp != NULL) {
 			    FREE((caddr_t)nfhp, M_NFSFH);
 			}
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *tl);
 			if (tryformoredirs)
 				more_dirs = !eof;
 			if (nd->nd_flag & ND_NFSV4) {
 				error = nfscl_postop_attr(nd, nap, attrflagp,
 				    stuff);
 				if (error)
 					goto nfsmout;
 			}
 		}
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uio_iov_base_add(uiop, left);
 		uio_iov_len_add(uiop, -(left));
 		uio_uio_resid_add(uiop, -(left));
 		uiop->uio_offset += left;
 	}
 
 	/*
 	 * If returning no data, assume end of file.
 	 * If not bigenough, return not end of file, since you aren't
 	 *    returning all the data
 	 * Otherwise, return the eof flag from the server.
 	 */
 	if (eofp != NULL) {
 		if (tresid == uio_uio_resid(uiop))
 			*eofp = 1;
 		else if (!bigenough)
 			*eofp = 0;
 		else
 			*eofp = eof;
 	}
 
 	/*
 	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
 	 */
 	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
 		dp = (struct dirent *)uio_iov_base(uiop);
 		dp->d_type = DT_UNKNOWN;
 		dp->d_fileno = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		tl = (u_int32_t *)&dp->d_name[4];
 		*tl++ = cookie.lval[0];
 		*tl = cookie.lval[1];
 		dp->d_reclen = DIRBLKSIZ;
 		uio_iov_base_add(uiop, DIRBLKSIZ);
 		uio_iov_len_add(uiop, -(DIRBLKSIZ));
 		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
 		uiop->uio_offset += DIRBLKSIZ;
 	}
 
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 #endif	/* !APPLE */
 
 /*
  * Nfs commit rpc
  */
 APPLESTATIC int
 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
 	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * And do a Getattr op.
 		 */
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		NFSGETATTR_ATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
 	if (!error && !nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 		NFSLOCKMNT(nmp);
 		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
 			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 			nd->nd_repstat = NFSERR_STALEWRITEVERF;
 		}
 		NFSUNLOCKMNT(nmp);
 		if (nd->nd_flag & ND_NFSV4)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	}
 nfsmout:
 	if (!error && nd->nd_repstat)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * NFS byte range lock rpc.
  * (Mostly just calls one of the three lower level RPC routines.)
  */
 APPLESTATIC int
 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	struct nfscllockowner *lp;
 	struct nfsclclient *clp;
 	struct nfsfh *nfhp;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	u_int64_t off, len;
 	off_t start, end;
 	u_int32_t clidrev = 0;
 	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
 	int callcnt, dorpc;
 
 	/*
 	 * Convert the flock structure into a start and end and do POSIX
 	 * bounds checking.
 	 */
 	switch (fl->l_whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
 		/*
 		 * Caller is responsible for adding any necessary offset
 		 * when SEEK_CUR is used.
 		 */
 		start = fl->l_start;
 		off = fl->l_start;
 		break;
 	case SEEK_END:
 		start = size + fl->l_start;
 		off = size + fl->l_start;
 		break;
 	default:
 		return (EINVAL);
 	}
 	if (start < 0)
 		return (EINVAL);
 	if (fl->l_len != 0) {
 		end = start + fl->l_len - 1;
 		if (end < start)
 			return (EINVAL);
 	}
 
 	len = fl->l_len;
 	if (len == 0)
 		len = NFS64BITSSET;
 	retrycnt = 0;
 	do {
 	    nd->nd_repstat = 0;
 	    if (op == F_GETLK) {
 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
 		if (!error) {
 			clidrev = clp->nfsc_clientidrev;
 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
 			    p, id, flags);
 		} else if (error == -1) {
 			error = 0;
 		}
 		nfscl_clientrelease(clp);
 	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
 		/*
 		 * We must loop around for all lockowner cases.
 		 */
 		callcnt = 0;
 		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		do {
 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
 			clp, id, flags, &lp, &dorpc);
 		    /*
 		     * If it returns a NULL lp, we're done.
 		     */
 		    if (lp == NULL) {
 			if (callcnt == 0)
 			    nfscl_clientrelease(clp);
 			else
 			    nfscl_releasealllocks(clp, vp, p, id, flags);
 			return (error);
 		    }
 		    if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		    else
 			clidrev = 0;
 		    /*
 		     * If the server doesn't support Posix lock semantics,
 		     * only allow locks on the entire file, since it won't
 		     * handle overlapping byte ranges.
 		     * There might still be a problem when a lock
 		     * upgrade/downgrade (read<->write) occurs, since the
 		     * server "might" expect an unlock first?
 		     */
 		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
 			(off == 0 && len == NFS64BITSSET))) {
 			/*
 			 * Since the lock records will go away, we must
 			 * wait for grace and delay here.
 			 */
 			do {
 			    error = nfsrpc_locku(nd, nmp, lp, off, len,
 				NFSV4LOCKT_READ, cred, p, 0);
 			    if ((nd->nd_repstat == NFSERR_GRACE ||
 				 nd->nd_repstat == NFSERR_DELAY) &&
 				error == 0)
 				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
 				    "nfs_advlock");
 			} while ((nd->nd_repstat == NFSERR_GRACE ||
 			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
 		    }
 		    callcnt++;
 		} while (error == 0 && nd->nd_repstat == 0);
 		nfscl_releasealllocks(clp, vp, p, id, flags);
 	    } else if (op == F_SETLK) {
 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
 		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
 		if (error || donelocally) {
 			return (error);
 		}
 		if (nmp->nm_clp != NULL)
 			clidrev = nmp->nm_clp->nfsc_clientidrev;
 		else
 			clidrev = 0;
 		nfhp = VTONFS(vp)->n_fhp;
 		if (!lp->nfsl_open->nfso_posixlock &&
 		    (off != 0 || len != NFS64BITSSET)) {
 			error = EINVAL;
 		} else {
 			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
 			    nfhp->nfh_len, lp, newone, reclaim, off,
 			    len, fl->l_type, cred, p, 0);
 		}
 		if (!error)
 			error = nd->nd_repstat;
 		nfscl_lockrelease(lp, error, newone);
 	    } else {
 		error = EINVAL;
 	    }
 	    if (!error)
 	        error = nd->nd_repstat;
 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		error == NFSERR_STALEDONTRECOVER ||
 		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
 		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
 		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
 		retrycnt++;
 	    }
 	} while (error == NFSERR_GRACE ||
 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
 }
 
 /*
  * The lower level routine for the LockT case.
  */
 APPLESTATIC int
 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	u_int32_t *tl;
 	int error, type, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	struct nfsclsession *tsep;
 
 	nmp = VFSTONFS(vp->v_mount);
 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (fl->l_type == F_RDLCK)
 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 	else
 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nfscl_filllockowner(id, own, flags);
 	np = VTONFS(vp);
 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
 	    np->n_fhp->nfh_len);
 	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		fl->l_type = F_UNLCK;
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		nd->nd_repstat = 0;
 		fl->l_whence = SEEK_SET;
 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 		fl->l_start = fxdr_hyper(tl);
 		tl += 2;
 		len = fxdr_hyper(tl);
 		tl += 2;
 		if (len == NFS64BITSSET)
 			fl->l_len = 0;
 		else
 			fl->l_len = len;
 		type = fxdr_unsigned(int, *tl++);
 		if (type == NFSV4LOCKT_WRITE)
 			fl->l_type = F_WRLCK;
 		else
 			fl->l_type = F_RDLCK;
 		/*
 		 * XXX For now, I have no idea what to do with the
 		 * conflicting lock_owner, so I'll just set the pid == 0
 		 * and skip over the lock_owner.
 		 */
 		fl->l_pid = (pid_t)0;
 		tl += 2;
 		size = fxdr_unsigned(int, *tl);
 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
 			error = EBADRPC;
 		if (!error)
 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
 	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
 		nfscl_initiate_recovery(clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Lower level function that performs the LockU RPC.
  */
 static int
 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
 	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(type);
 	*tl = txdr_unsigned(lp->nfsl_seqid);
 	if (nfstest_outofseq &&
 	    (arc4random() % nfstest_outofseq) == 0)
 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
 	tl++;
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = lp->nfsl_stateid.seqid;
 	*tl++ = lp->nfsl_stateid.other[0];
 	*tl++ = lp->nfsl_stateid.other[1];
 	*tl++ = lp->nfsl_stateid.other[2];
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		lp->nfsl_stateid.seqid = *tl++;
 		lp->nfsl_stateid.other[0] = *tl++;
 		lp->nfsl_stateid.other[1] = *tl++;
 		lp->nfsl_stateid.other[2] = *tl;
 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * The actual Lock RPC.
  */
 APPLESTATIC int
 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
     NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	int error, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (type == F_RDLCK)
 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 	else
 		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 	*tl++ = txdr_unsigned(reclaim);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	if (newone) {
 	    *tl = newnfs_true;
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 		2 * NFSX_UNSIGNED + NFSX_HYPER);
 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
 	    if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	    else
 		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
 	    tsep = nfsmnt_mdssession(nmp);
 	    *tl++ = tsep->nfsess_clientid.lval[0];
 	    *tl = tsep->nfsess_clientid.lval[1];
 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
 	} else {
 	    *tl = newnfs_false;
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 	    if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	    else
 		*tl++ = lp->nfsl_stateid.seqid;
 	    *tl++ = lp->nfsl_stateid.other[0];
 	    *tl++ = lp->nfsl_stateid.other[1];
 	    *tl++ = lp->nfsl_stateid.other[2];
 	    *tl = txdr_unsigned(lp->nfsl_seqid);
 	    if (nfstest_outofseq &&
 		(arc4random() % nfstest_outofseq) == 0)
 		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
 	}
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (newone)
 	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		lp->nfsl_stateid.seqid = *tl++;
 		lp->nfsl_stateid.other[0] = *tl++;
 		lp->nfsl_stateid.other[1] = *tl++;
 		lp->nfsl_stateid.other[2] = *tl;
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 		size = fxdr_unsigned(int, *(tl + 7));
 		if (size < 0 || size > NFSV4_OPAQUELIMIT)
 			error = EBADRPC;
 		if (!error)
 			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
 	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
 		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs statfs rpc
  * (always called with the vp for the mount point)
  */
 APPLESTATIC int
 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff)
 {
 	u_int32_t *tl = NULL;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	nfsattrbit_t attrbits;
 	int error;
 
 	*attrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4(nmp)) {
 		/*
 		 * For V4, you actually do a getattr.
 		 */
 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 		NFSSTATFS_GETATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
 			    cred);
 			if (!error) {
 				nmp->nm_fsid[0] = nap->na_filesid[0];
 				nmp->nm_fsid[1] = nap->na_filesid[1];
 				NFSSETHASSETFSID(nmp);
 				*attrflagp = 1;
 			}
 		} else {
 			error = nd->nd_repstat;
 		}
 		if (error)
 			goto nfsmout;
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 			if (error)
 				goto nfsmout;
 		}
 		if (nd->nd_repstat) {
 			error = nd->nd_repstat;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, u_int32_t *,
 		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
 	}
 	if (NFSHASNFSV3(nmp)) {
 		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
 		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
 		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
 	} else if (NFSHASNFSV4(nmp) == 0) {
 		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
 		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs pathconf rpc
  */
 APPLESTATIC int
 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
     void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp;
 	u_int32_t *tl;
 	nfsattrbit_t attrbits;
 	int error;
 
 	*attrflagp = 0;
 	nmp = VFSTONFS(vnode_mount(vp));
 	if (NFSHASNFSV4(nmp)) {
 		/*
 		 * For V4, you actually do a getattr.
 		 */
 		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
 		NFSPATHCONF_GETATTRBIT(&attrbits);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		if (nd->nd_repstat == 0) {
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
 			    cred);
 			if (!error)
 				*attrflagp = 1;
 		} else {
 			error = nd->nd_repstat;
 		}
 	} else {
 		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
 		error = nfscl_request(nd, vp, p, cred, stuff);
 		if (error)
 			return (error);
 		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 		if (nd->nd_repstat && !error)
 			error = nd->nd_repstat;
 		if (!error) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
 			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_chownrestricted =
 			    fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_caseinsensitive =
 			    fxdr_unsigned(u_int32_t, *tl++);
 			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
 		}
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs version 3 fsinfo rpc call
  */
 APPLESTATIC int
 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	if (nd->nd_repstat && !error)
 		error = nd->nd_repstat;
 	if (!error) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
 		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
 		fsp->fs_maxfilesize = fxdr_hyper(tl);
 		tl += 2;
 		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
 		tl += 2;
 		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
 	}
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Renew RPC.
  */
 APPLESTATIC int
 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
     NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfsmount *nmp;
 	int error;
 	struct nfssockreq *nrp;
 	struct nfsclsession *tsep;
 
 	nmp = clp->nfsc_nmp;
 	if (nmp == NULL)
 		return (0);
 	if (dsp == NULL)
 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL);
 	else
 		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
 		    &dsp->nfsclds_sess);
 	if (!NFSHASNFSV4N(nmp)) {
 		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		tsep = nfsmnt_mdssession(nmp);
 		*tl++ = tsep->nfsess_clientid.lval[0];
 		*tl = tsep->nfsess_clientid.lval[1];
 	}
 	nrp = NULL;
 	if (dsp != NULL)
 		nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	nd->nd_flag |= ND_USEGSSNAME;
 	if (dsp == NULL)
 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	else
 		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Releaselockowner RPC.
  */
 APPLESTATIC int
 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	u_int32_t *tl;
 	int error;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsclsession *tsep;
 
 	if (NFSHASNFSV4N(nmp)) {
 		/* For NFSv4.1, do a FreeStateID. */
 		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
 		    NULL);
 		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
 	} else {
 		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
 		    NULL);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		tsep = nfsmnt_mdssession(nmp);
 		*tl++ = tsep->nfsess_clientid.lval[0];
 		*tl = tsep->nfsess_clientid.lval[1];
 		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
 		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
 		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Compound to get the mount pt FH.
  */
 APPLESTATIC int
 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
     NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	u_char *cp, *cp2;
 	int error, cnt, len, setnil;
 	u_int32_t *opcntp;
 
 	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
 	cp = dirpath;
 	cnt = 0;
 	do {
 		setnil = 0;
 		while (*cp == '/')
 			cp++;
 		cp2 = cp;
 		while (*cp2 != '\0' && *cp2 != '/')
 			cp2++;
 		if (*cp2 == '/') {
 			setnil = 1;
 			*cp2 = '\0';
 		}
 		if (cp2 != cp) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
 			nfsm_strtom(nd, cp, strlen(cp));
 			cnt++;
 		}
 		if (setnil)
 			*cp2++ = '/';
 		cp = cp2;
 	} while (*cp != '\0');
 	if (NFSHASNFSV4N(nmp))
 		/* Has a Sequence Op done by nfscl_reqstart(). */
 		*opcntp = txdr_unsigned(3 + cnt);
 	else
 		*opcntp = txdr_unsigned(2 + cnt);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETFH);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
 		tl += (2 + 2 * cnt);
 		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
 			len > NFSX_FHMAX) {
 			nd->nd_repstat = NFSERR_BADXDR;
 		} else {
 			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
 			if (nd->nd_repstat == 0)
 				nmp->nm_fhsize = len;
 		}
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * This function performs the Delegreturn RPC.
  */
 APPLESTATIC int
 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
 	    dp->nfsdl_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 	if (NFSHASNFSV4N(nmp))
 		*tl++ = 0;
 	else
 		*tl++ = dp->nfsdl_stateid.seqid;
 	*tl++ = dp->nfsdl_stateid.other[0];
 	*tl++ = dp->nfsdl_stateid.other[1];
 	*tl = dp->nfsdl_stateid.other[2];
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs getacl call.
  */
 APPLESTATIC int
 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	if (!nd->nd_repstat)
 		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
 	else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * nfs setacl call.
  */
 APPLESTATIC int
 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, void *stuff)
 {
 	int error;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
 	return (error);
 }
 
 /*
  * nfs setacl call.
  */
 static int
 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error;
 	nfsattrbit_t attrbits;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	if (!NFSHASNFSV4(nmp))
 		return (EOPNOTSUPP);
 	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
 	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
 	error = nfscl_request(nd, vp, p, cred, stuff);
 	if (error)
 		return (error);
 	/* Don't care about the pre/postop attributes */
 	mbuf_freem(nd->nd_mrep);
 	return (nd->nd_repstat);
 }
 
 /*
  * Do the NFSv4.1 Exchange ID.
  */
 int
 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl, v41flags;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfsclds *dsp;
 	struct timespec verstime;
 	int error, len;
 
 	*dspp = NULL;
 	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
 	*tl = txdr_unsigned(clp->nfsc_rev);
 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
 
 	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(exchflags);
 	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
 
 	/* Set the implementation id4 */
 	*tl = txdr_unsigned(1);
 	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
 	(void) nfsm_strtom(nd, version, strlen(version));
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
 	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
 	verstime.tv_nsec = 0;
 	txdr_nfsv4time(&verstime, tl);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
 	    (int)nd->nd_repstat);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
 		len = fxdr_unsigned(int, *(tl + 7));
 		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
 		    M_WAITOK | M_ZERO);
 		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
 		dsp->nfsclds_servownlen = len;
 		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
 		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
 		dsp->nfsclds_sess.nfsess_sequenceid =
 		    fxdr_unsigned(uint32_t, *tl++);
 		v41flags = fxdr_unsigned(uint32_t, *tl);
 		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
 		    NFSHASPNFSOPT(nmp)) {
 			NFSCL_DEBUG(1, "set PNFS\n");
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_PNFS;
 			NFSUNLOCKMNT(nmp);
 			dsp->nfsclds_flags |= NFSCLDS_MDS;
 		}
 		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
 			dsp->nfsclds_flags |= NFSCLDS_DS;
 		if (len > 0)
 			nd->nd_repstat = nfsrv_mtostr(nd,
 			    dsp->nfsclds_serverown, len);
 		if (nd->nd_repstat == 0) {
 			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
 			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
 			    NULL, MTX_DEF);
 			nfscl_initsessionslots(&dsp->nfsclds_sess);
 			*dspp = dsp;
 		} else
 			free(dsp, M_NFSCLDS);
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Create Session.
  */
 int
 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
     NFSPROC_T *p)
 {
 	uint32_t crflags, *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error, irdcnt;
 
 	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	*tl++ = sep->nfsess_clientid.lval[0];
 	*tl++ = sep->nfsess_clientid.lval[1];
 	*tl++ = txdr_unsigned(sequenceid);
 	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
 	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
 		crflags |= NFSV4CRSESS_CONNBACKCHAN;
 	*tl = txdr_unsigned(crflags);
 
 	/* Fill in fore channel attributes. */
 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	*tl++ = 0;				/* Header pad size */
 	*tl++ = txdr_unsigned(100000);		/* Max request size */
 	*tl++ = txdr_unsigned(100000);		/* Max response size */
 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
 	*tl++ = txdr_unsigned(20);		/* Max operations */
 	*tl++ = txdr_unsigned(64);		/* Max slots */
 	*tl = 0;				/* No rdma ird */
 
 	/* Fill in back channel attributes. */
 	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	*tl++ = 0;				/* Header pad size */
 	*tl++ = txdr_unsigned(10000);		/* Max request size */
 	*tl++ = txdr_unsigned(10000);		/* Max response size */
 	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
 	*tl++ = txdr_unsigned(4);		/* Max operations */
 	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
 	*tl = 0;				/* No rdma ird */
 
 	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
 
 	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
 	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
 	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
 	*tl++ = 0;				/* Null machine name */
 	*tl++ = 0;				/* Uid == 0 */
 	*tl++ = 0;				/* Gid == 0 */
 	*tl = 0;				/* No additional gids */
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
 	    NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
 		    2 * NFSX_UNSIGNED);
 		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
 		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
 		crflags = fxdr_unsigned(uint32_t, *tl);
 		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
 			NFSLOCKMNT(nmp);
 			nmp->nm_state |= NFSSTA_SESSPERSIST;
 			NFSUNLOCKMNT(nmp);
 		}
 
 		/* Get the fore channel slot count. */
 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 		tl += 3;		/* Skip the other counts. */		
 		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
 		tl++;
 		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
 		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
 		irdcnt = fxdr_unsigned(int, *tl);
 		if (irdcnt > 0)
 			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
 
 		/* and the back channel slot count. */
 		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 		tl += 5;
 		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
 		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
 	}
 	error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Destroy Session.
  */
 int
 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 	tsep = nfsmnt_mdssession(nmp);
 	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Destroy Client.
  */
 int
 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 	struct nfsclsession *tsep;
 
 	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	tsep = nfsmnt_mdssession(nmp);
 	*tl++ = tsep->nfsess_clientid.lval[0];
 	*tl = tsep->nfsess_clientid.lval[1];
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutGet.
  */
 int
 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
     uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
     struct ucred *cred, NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsfh *nfhp;
 	struct nfsclflayout *flp, *prevflp, *tflp;
 	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
 	uint8_t *cp;
 	uint64_t retlen;
 
 	flp = NULL;
 	gotiomode = -1;
 	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
 	    NFSX_STATEID);
 	*tl++ = newnfs_false;		/* Don't signal availability. */
 	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
 	*tl++ = txdr_unsigned(iomode);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	txdr_hyper(minlen, tl);
 	tl += 2;
 	*tl++ = txdr_unsigned(stateidp->seqid);
 	NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
 	*tl++ = stateidp->other[0];
 	*tl++ = stateidp->other[1];
 	*tl++ = stateidp->other[2];
 	*tl = txdr_unsigned(layoutlen);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
 		if (*tl++ != 0)
 			*retonclosep = 1;
 		else
 			*retonclosep = 0;
 		stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
 		NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
 		    (int)stateidp->seqid);
 		stateidp->other[0] = *tl++;
 		stateidp->other[1] = *tl++;
 		stateidp->other[2] = *tl++;
 		cnt = fxdr_unsigned(int, *tl);
 		NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
 		if (cnt <= 0 || cnt > 10000) {
 			/* Don't accept more than 10000 layouts in reply. */
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		for (i = 0; i < cnt; i++) {
 			/* Dissect all the way to the file handle cnt. */
 			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
 			    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
 			fhcnt = fxdr_unsigned(int, *(tl + 11 +
 			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
 			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
 			if (fhcnt < 0 || fhcnt > 100) {
 				/* Don't accept more than 100 file handles. */
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (fhcnt > 1)
 				flp = malloc(sizeof(*flp) + (fhcnt - 1) *
 				    sizeof(struct nfsfh *),
 				    M_NFSFLAYOUT, M_WAITOK);
 			else
 				flp = malloc(sizeof(*flp),
 				    M_NFSFLAYOUT, M_WAITOK);
 			flp->nfsfl_flags = 0;
 			flp->nfsfl_fhcnt = 0;
 			flp->nfsfl_devp = NULL;
 			flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
 			retlen = fxdr_hyper(tl); tl += 2;
 			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
 				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
 			else
 				flp->nfsfl_end = flp->nfsfl_off + retlen;
 			flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
 			if (gotiomode == -1)
 				gotiomode = flp->nfsfl_iomode;
 			NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode,
 			    (int)flp->nfsfl_iomode);
 			if (fxdr_unsigned(int, *tl++) !=
 			    NFSLAYOUT_NFSV4_1_FILES) {
 				printf("NFSv4.1: got non-files layout\n");
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
 			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
 			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
 			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
 			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
 			if (fxdr_unsigned(int, *tl) != fhcnt) {
 				printf("EEK! bad fhcnt\n");
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			for (j = 0; j < fhcnt; j++) {
 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 				nfhlen = fxdr_unsigned(int, *tl);
 				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
 				    M_NFSFH, M_WAITOK);
 				flp->nfsfl_fh[j] = nfhp;
 				flp->nfsfl_fhcnt++;
 				nfhp->nfh_len = nfhlen;
 				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
 				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
 			}
 			if (flp->nfsfl_iomode == gotiomode) {
 				/* Keep the list in increasing offset order. */
 				tflp = LIST_FIRST(flhp);
 				prevflp = NULL;
 				while (tflp != NULL &&
 				    tflp->nfsfl_off < flp->nfsfl_off) {
 					prevflp = tflp;
 					tflp = LIST_NEXT(tflp, nfsfl_list);
 				}
 				if (prevflp == NULL)
 					LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
 				else
 					LIST_INSERT_AFTER(prevflp, flp,
 					    nfsfl_list);
 			} else {
 				printf("nfscl_layoutget(): got wrong iomode\n");
 				nfscl_freeflayout(flp);
 			}
 			flp = NULL;
 		}
 	}
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 nfsmout:
 	if (error != 0 && flp != NULL)
 		nfscl_freeflayout(flp);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Get Device Info.
  */
 int
 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
     NFSPROC_T *p)
 {
 	uint32_t cnt, *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct sockaddr_storage ss;
 	struct nfsclds *dsp = NULL, **dspp;
 	struct nfscldevinfo *ndi;
 	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
 	uint8_t stripeindex;
 
 	*ndip = NULL;
 	ndi = NULL;
 	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
 	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(layouttype);
 	*tl++ = txdr_unsigned(100000);
 	if (notifybitsp != NULL && *notifybitsp != 0) {
 		*tl = txdr_unsigned(1);		/* One word of bits. */
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(*notifybitsp);
 	} else
 		*tl = txdr_unsigned(0);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		if (layouttype != fxdr_unsigned(int, *tl++))
 			printf("EEK! devinfo layout type not same!\n");
 		stripecnt = fxdr_unsigned(int, *++tl);
 		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
 		if (stripecnt < 1 || stripecnt > 4096) {
 			printf("NFS devinfo stripecnt %d: out of range\n",
 			    stripecnt);
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
 		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
 		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
 		if (addrcnt < 1 || addrcnt > 128) {
 			printf("NFS devinfo addrcnt %d: out of range\n",
 			    addrcnt);
 			error = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 
 		/*
 		 * Now we know how many stripe indices and addresses, so
 		 * we can allocate the structure the correct size.
 		 */
 		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
 		    + 1;
 		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
 		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
 		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
 		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
 		ndi->nfsdi_refcnt = 0;
 		ndi->nfsdi_stripecnt = stripecnt;
 		ndi->nfsdi_addrcnt = addrcnt;
 		/* Fill in the stripe indices. */
 		for (i = 0; i < stripecnt; i++) {
 			stripeindex = fxdr_unsigned(uint8_t, *tl++);
 			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
 			if (stripeindex >= addrcnt) {
 				printf("NFS devinfo stripeindex %d: too big\n",
 				    (int)stripeindex);
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			nfsfldi_setstripeindex(ndi, i, stripeindex);
 		}
 
 		/* Now, dissect the server address(es). */
 		safilled = 0;
 		for (i = 0; i < addrcnt; i++) {
 			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 			cnt = fxdr_unsigned(uint32_t, *tl);
 			if (cnt == 0) {
 				printf("NFS devinfo 0 len addrlist\n");
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			dspp = nfsfldi_addr(ndi, i);
 			pos = arc4random() % cnt;	/* Choose one. */
 			safilled = 0;
 			for (j = 0; j < cnt; j++) {
 				error = nfsv4_getipaddr(nd, &ss, &isudp);
 				if (error != 0 && error != EPERM) {
 					error = NFSERR_BADXDR;
 					goto nfsmout;
 				}
 				if (error == 0 && isudp == 0) {
 					/*
 					 * The algorithm is:
 					 * - use "pos" entry if it is of the
 					 *   same af_family or none of them
 					 *   is of the same af_family
 					 * else
 					 * - use the first one of the same
 					 *   af_family.
 					 */
 					if ((safilled == 0 && ss.ss_family ==
 					     nmp->nm_nam->sa_family) ||
 					    (j == pos &&
 					     (safilled == 0 || ss.ss_family ==
 					      nmp->nm_nam->sa_family)) ||
 					    (safilled == 1 && ss.ss_family ==
 					     nmp->nm_nam->sa_family)) {
 						error = nfsrpc_fillsa(nmp, &ss,
 						    &dsp, p);
 						if (error == 0) {
 							*dspp = dsp;
 							if (ss.ss_family ==
 							 nmp->nm_nam->sa_family)
 								safilled = 2;
 							else
 								safilled = 1;
 						}
 					}
 				}
 			}
 			if (safilled == 0)
 				break;
 		}
 
 		/* And the notify bits. */
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		if (safilled != 0) {
 			bitcnt = fxdr_unsigned(int, *tl);
 			if (bitcnt > 0) {
 				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 				if (notifybitsp != NULL)
 					*notifybitsp =
 					    fxdr_unsigned(uint32_t, *tl);
 			}
 			*ndip = ndi;
 		} else
 			error = EPERM;
 	}
 	if (nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 nfsmout:
 	if (error != 0 && ndi != NULL)
 		nfscl_freedevinfo(ndi);
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutCommit.
  */
 int
 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
     int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
     NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, outcnt, i;
 	uint8_t *cp;
 
 	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
 	    NFSX_STATEID);
 	txdr_hyper(off, tl);
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
 	if (reclaim != 0)
 		*tl++ = newnfs_true;
 	else
 		*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(stateidp->seqid);
 	*tl++ = stateidp->other[0];
 	*tl++ = stateidp->other[1];
 	*tl++ = stateidp->other[2];
 	*tl++ = newnfs_true;
 	if (lastbyte < off)
 		lastbyte = off;
 	else if (lastbyte >= (off + len))
 		lastbyte = off + len - 1;
 	txdr_hyper(lastbyte, tl);
 	tl += 2;
 	*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(layouttype);
 	*tl = txdr_unsigned(layoutupdatecnt);
 	if (layoutupdatecnt > 0) {
 		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
 		    ("Must be nil for Files Layout"));
 		outcnt = NFSM_RNDUP(layoutupdatecnt);
 		NFSM_BUILD(cp, uint8_t *, outcnt);
 		NFSBCOPY(layp, cp, layoutupdatecnt);
 		cp += layoutupdatecnt;
 		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
 			*cp++ = 0x0;
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 LayoutReturn.
  */
 int
 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
     uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
     struct ucred *cred, NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, outcnt, i;
 	uint8_t *cp;
 
 	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	if (reclaim != 0)
 		*tl++ = newnfs_true;
 	else
 		*tl++ = newnfs_false;
 	*tl++ = txdr_unsigned(layouttype);
 	*tl++ = txdr_unsigned(iomode);
 	*tl = txdr_unsigned(layoutreturn);
 	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
 		    NFSX_UNSIGNED);
 		txdr_hyper(offset, tl);
 		tl += 2;
 		txdr_hyper(len, tl);
 		tl += 2;
 		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		*tl++ = stateidp->other[0];
 		*tl++ = stateidp->other[1];
 		*tl++ = stateidp->other[2];
 		*tl = txdr_unsigned(layoutcnt);
 		if (layoutcnt > 0) {
 			outcnt = NFSM_RNDUP(layoutcnt);
 			NFSM_BUILD(cp, uint8_t *, outcnt);
 			NFSBCOPY(layp, cp, layoutcnt);
 			cp += layoutcnt;
 			for (i = 0; i < (outcnt - layoutcnt); i++)
 				*cp++ = 0x0;
 		}
 	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 		if (*tl != 0) {
 			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
 			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
 			stateidp->other[0] = *tl++;
 			stateidp->other[1] = *tl++;
 			stateidp->other[2] = *tl;
 		}
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Acquire a layout and devinfo, if possible. The caller must have acquired
  * a reference count on the nfsclclient structure before calling this.
  * Return the layout in lypp with a reference count on it, if successful.
  */
 static int
 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfscllayout *lyp;
 	struct nfsclflayout *flp, *tflp;
 	struct nfscldevinfo *dip;
 	struct nfsclflayouthead flh;
 	int error = 0, islocked, layoutlen, recalled, retonclose;
 	nfsv4stateid_t stateid;
 	struct nfsclsession *tsep;
 
 	*lypp = NULL;
 	/*
 	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
 	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
 	 * flp == NULL.
 	 */
 	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
 	    off, &flp, &recalled);
 	islocked = 0;
 	if (lyp == NULL || flp == NULL) {
 		if (recalled != 0)
 			return (EIO);
 		LIST_INIT(&flh);
 		tsep = nfsmnt_mdssession(nmp);
 		layoutlen = tsep->nfsess_maxcache -
 		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
 		if (lyp == NULL) {
 			stateid.seqid = 0;
 			stateid.other[0] = stateidp->other[0];
 			stateid.other[1] = stateidp->other[1];
 			stateid.other[2] = stateidp->other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
 			    &flh, cred, p, NULL);
 		} else {
 			islocked = 1;
 			stateid.seqid = lyp->nfsly_stateid.seqid;
 			stateid.other[0] = lyp->nfsly_stateid.other[0];
 			stateid.other[1] = lyp->nfsly_stateid.other[1];
 			stateid.other[2] = lyp->nfsly_stateid.other[2];
 			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
 			    nfhp->nfh_len, iomode, off, UINT64_MAX,
 			    (uint64_t)0, layoutlen, &stateid, &retonclose,
 			    &flh, cred, p, NULL);
 		}
 		if (error == 0)
 			LIST_FOREACH(tflp, &flh, nfsfl_list) {
 				error = nfscl_adddevinfo(nmp, NULL, tflp);
 				if (error != 0) {
 					error = nfsrpc_getdeviceinfo(nmp,
 					    tflp->nfsfl_dev,
 					    NFSLAYOUT_NFSV4_1_FILES,
 					    notifybitsp, &dip, cred, p);
 					if (error != 0)
 						break;
 					error = nfscl_adddevinfo(nmp, dip,
 					    tflp);
 					if (error != 0)
 						printf(
 						    "getlayout: cannot add\n");
 				}
 			}
 		if (error == 0) {
 			/*
 			 * nfscl_layout() always returns with the nfsly_lock
 			 * set to a refcnt (shared lock).
 			 */
 			error = nfscl_layout(nmp, vp, nfhp->nfh_fh,
 			    nfhp->nfh_len, &stateid, retonclose, &flh, &lyp,
 			    cred, p);
 			if (error == 0)
 				*lypp = lyp;
 		} else if (islocked != 0)
 			nfsv4_unlock(&lyp->nfsly_lock, 0);
 	} else
 		*lypp = lyp;
 	return (error);
 }
 
 /*
  * Do a TCP connection plus exchange id and create session.
  * If successful, a "struct nfsclds" is linked into the list for the
  * mount point and a pointer to it is returned.
  */
 static int
 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
     struct nfsclds **dspp, NFSPROC_T *p)
 {
 	struct sockaddr_in *msad, *sad, *ssd;
 	struct sockaddr_in6 *msad6, *sad6, *ssd6;
 	struct nfsclclient *clp;
 	struct nfssockreq *nrp;
 	struct nfsclds *dsp, *tdsp;
 	int error;
 	enum nfsclds_state retv;
 	uint32_t sequenceid;
 
 	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
 	    ("nfsrpc_fillsa: NULL nr_cred"));
 	NFSLOCKCLSTATE();
 	clp = nmp->nm_clp;
 	NFSUNLOCKCLSTATE();
 	if (clp == NULL)
 		return (EPERM);
 	if (ssp->ss_family == AF_INET) {
 		ssd = (struct sockaddr_in *)ssp;
 		NFSLOCKMNT(nmp);
 
 		/*
 		 * Check to see if we already have a session for this
 		 * address that is usable for a DS.
 		 * Note that the MDS's address is in a different place
 		 * than the sessions already acquired for DS's.
 		 */
 		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
 		while (tdsp != NULL) {
 			if (msad != NULL && msad->sin_family == AF_INET &&
 			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
 			    ssd->sin_port == msad->sin_port &&
 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
 				*dspp = tdsp;
 				NFSUNLOCKMNT(nmp);
 				NFSCL_DEBUG(4, "fnd same addr\n");
 				return (0);
 			}
 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
 				msad = (struct sockaddr_in *)
 				    tdsp->nfsclds_sockp->nr_nam;
 			else
 				msad = NULL;
 		}
 		NFSUNLOCKMNT(nmp);
 
 		/* No IP address match, so look for new/trunked one. */
 		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
 		sad->sin_len = sizeof(*sad);
 		sad->sin_family = AF_INET;
 		sad->sin_port = ssd->sin_port;
 		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
 		nrp->nr_nam = (struct sockaddr *)sad;
 	} else if (ssp->ss_family == AF_INET6) {
 		ssd6 = (struct sockaddr_in6 *)ssp;
 		NFSLOCKMNT(nmp);
 
 		/*
 		 * Check to see if we already have a session for this
 		 * address that is usable for a DS.
 		 * Note that the MDS's address is in a different place
 		 * than the sessions already acquired for DS's.
 		 */
 		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
 		tdsp = TAILQ_FIRST(&nmp->nm_sess);
 		while (tdsp != NULL) {
 			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
 			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
 			    &msad6->sin6_addr) &&
 			    ssd6->sin6_port == msad6->sin6_port &&
 			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
 			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
 				*dspp = tdsp;
 				NFSUNLOCKMNT(nmp);
 				return (0);
 			}
 			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
 			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
 				msad6 = (struct sockaddr_in6 *)
 				    tdsp->nfsclds_sockp->nr_nam;
 			else
 				msad6 = NULL;
 		}
 		NFSUNLOCKMNT(nmp);
 
 		/* No IP address match, so look for new/trunked one. */
 		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
 		sad6->sin6_len = sizeof(*sad6);
 		sad6->sin6_family = AF_INET6;
 		sad6->sin6_port = ssd6->sin6_port;
 		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
 		    sizeof(struct in6_addr));
 		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
 		nrp->nr_nam = (struct sockaddr *)sad6;
 	} else
 		return (EPERM);
 
 	nrp->nr_sotype = SOCK_STREAM;
 	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
 	nrp->nr_prog = NFS_PROG;
 	nrp->nr_vers = NFS_VER4;
 
 	/*
 	 * Use the credentials that were used for the mount, which are
 	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
 	 * Ref. counting the credentials with crhold() is probably not
 	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
 	 * unmount, but I did it anyhow.
 	 */
 	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
 	error = newnfs_connect(nmp, nrp, NULL, p, 0);
 	NFSCL_DEBUG(3, "DS connect=%d\n", error);
 
 	/* Now, do the exchangeid and create session. */
 	if (error == 0) {
 		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
 		    &dsp, nrp->nr_cred, p);
 		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
 		if (error != 0)
 			newnfs_disconnect(nrp);
 	}
 	if (error == 0) {
 		dsp->nfsclds_sockp = nrp;
 		NFSLOCKMNT(nmp);
 		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
 		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
 		if (retv == NFSDSP_USETHISSESSION) {
 			NFSUNLOCKMNT(nmp);
 			/*
 			 * If there is already a session for this server,
 			 * use it.
 			 */
 			(void)newnfs_disconnect(nrp);
 			nfscl_freenfsclds(dsp);
 			*dspp = tdsp;
 			return (0);
 		}
 		if (retv == NFSDSP_SEQTHISSESSION)
 			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
 		else
 			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
 		NFSUNLOCKMNT(nmp);
 		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
 		    nrp, sequenceid, 0, nrp->nr_cred, p);
 		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
 	} else {
 		NFSFREECRED(nrp->nr_cred);
 		NFSFREEMUTEX(&nrp->nr_mtx);
 		free(nrp->nr_nam, M_SONAME);
 		free(nrp, M_NFSSOCKREQ);
 	}
 	if (error == 0) {
 		NFSCL_DEBUG(3, "add DS session\n");
 		/*
 		 * Put it at the end of the list. That way the list
 		 * is ordered by when the entry was added. This matters
 		 * since the one done first is the one that should be
 		 * used for sequencid'ing any subsequent create sessions.
 		 */
 		NFSLOCKMNT(nmp);
 		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
 		NFSUNLOCKMNT(nmp);
 		*dspp = dsp;
 	} else if (dsp != NULL) {
 		newnfs_disconnect(nrp);
 		nfscl_freenfsclds(dsp);
 	}
 	return (error);
 }
 
 /*
  * Do the NFSv4.1 Reclaim Complete.
  */
 int
 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 	*tl = newnfs_false;
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0)
 		return (error);
 	error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * Initialize the slot tables for a session.
  */
 static void
 nfscl_initsessionslots(struct nfsclsession *sep)
 {
 	int i;
 
 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
 		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
 			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
 		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
 	}
 	for (i = 0; i < 64; i++)
 		sep->nfsess_slotseq[i] = 0;
 	sep->nfsess_slots = 0;
 }
 
 /*
  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
  */
 int
 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfscllayout *layp;
 	struct nfscldevinfo *dip;
 	struct nfsclflayout *rflp;
 	nfsv4stateid_t stateid;
 	struct ucred *newcred;
 	uint64_t lastbyte, len, off, oresid, xfer;
 	int eof, error, iolaymode, recalled;
 	void *lckp;
 
 	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
 	    (np->n_flag & NNOLAYOUT) != 0)
 		return (EIO);
 	/* Now, get a reference cnt on the clientid for this mount. */
 	if (nfscl_getref(nmp) == 0)
 		return (EIO);
 
 	/* Find an appropriate stateid. */
 	newcred = NFSNEWCRED(cred);
 	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
 	    rwaccess, 1, newcred, p, &stateid, &lckp);
 	if (error != 0) {
 		NFSFREECRED(newcred);
 		nfscl_relref(nmp);
 		return (error);
 	}
 	/* Search for a layout for this file. */
 	off = uiop->uio_offset;
 	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
 	    np->n_fhp->nfh_len, off, &rflp, &recalled);
 	if (layp == NULL || rflp == NULL) {
 		if (recalled != 0) {
 			NFSFREECRED(newcred);
 			nfscl_relref(nmp);
 			return (EIO);
 		}
 		if (layp != NULL) {
 			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
 			layp = NULL;
 		}
 		/* Try and get a Layout, if it is supported. */
 		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
 		    (np->n_flag & NWRITEOPENED) != 0)
 			iolaymode = NFSLAYOUTIOMODE_RW;
 		else
 			iolaymode = NFSLAYOUTIOMODE_READ;
 		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
 		    NULL, &stateid, off, &layp, newcred, p);
 		if (error != 0) {
 			NFSLOCKNODE(np);
 			np->n_flag |= NNOLAYOUT;
 			NFSUNLOCKNODE(np);
 			if (lckp != NULL)
 				nfscl_lockderef(lckp);
 			NFSFREECRED(newcred);
 			if (layp != NULL)
 				nfscl_rellayout(layp, 0);
 			nfscl_relref(nmp);
 			return (error);
 		}
 	}
 
 	/*
 	 * Loop around finding a layout that works for the first part of
 	 * this I/O operation, and then call the function that actually
 	 * does the RPC.
 	 */
 	eof = 0;
 	len = (uint64_t)uiop->uio_resid;
 	while (len > 0 && error == 0 && eof == 0) {
 		off = uiop->uio_offset;
 		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
 		if (error == 0) {
 			oresid = xfer = (uint64_t)uiop->uio_resid;
 			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
 				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
 			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
 			    rflp->nfsfl_devp);
 			if (dip != NULL) {
 				error = nfscl_doflayoutio(vp, uiop, iomode,
 				    must_commit, &eof, &stateid, rwaccess, dip,
 				    layp, rflp, off, xfer, newcred, p);
 				nfscl_reldevinfo(dip);
 				lastbyte = off + xfer - 1;
 				if (error == 0) {
 					NFSLOCKCLSTATE();
 					if (lastbyte > layp->nfsly_lastbyte)
 						layp->nfsly_lastbyte = lastbyte;
 					NFSUNLOCKCLSTATE();
 				} else if (error == NFSERR_OPENMODE &&
 				    rwaccess == NFSV4OPEN_ACCESSREAD) {
 					NFSLOCKMNT(nmp);
 					nmp->nm_state |= NFSSTA_OPENMODE;
 					NFSUNLOCKMNT(nmp);
 				}
 			} else
 				error = EIO;
 			if (error == 0)
 				len -= (oresid - (uint64_t)uiop->uio_resid);
 		}
 	}
 	if (lckp != NULL)
 		nfscl_lockderef(lckp);
 	NFSFREECRED(newcred);
 	nfscl_rellayout(layp, 0);
 	nfscl_relref(nmp);
 	return (error);
 }
 
 /*
  * Find a file layout that will handle the first bytes of the requested
  * range and return the information from it needed to to the I/O operation.
  */
 int
 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
     struct nfsclflayout **retflpp)
 {
 	struct nfsclflayout *flp, *nflp, *rflp;
 	uint32_t rw;
 
 	rflp = NULL;
 	rw = rwaccess;
 	/* For reading, do the Read list first and then the Write list. */
 	do {
 		if (rw == NFSV4OPEN_ACCESSREAD)
 			flp = LIST_FIRST(&lyp->nfsly_flayread);
 		else
 			flp = LIST_FIRST(&lyp->nfsly_flayrw);
 		while (flp != NULL) {
 			nflp = LIST_NEXT(flp, nfsfl_list);
 			if (flp->nfsfl_off > off)
 				break;
 			if (flp->nfsfl_end > off &&
 			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
 				rflp = flp;
 			flp = nflp;
 		}
 		if (rw == NFSV4OPEN_ACCESSREAD)
 			rw = NFSV4OPEN_ACCESSWRITE;
 		else
 			rw = 0;
 	} while (rw != 0);
 	if (rflp != NULL) {
 		/* This one covers the most bytes starting at off. */
 		*retflpp = rflp;
 		return (0);
 	}
 	return (EIO);
 }
 
 /*
  * Do I/O using an NFSv4.1 file layout.
  */
 static int
 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
     uint64_t len, struct ucred *cred, NFSPROC_T *p)
 {
 	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
 	int commit_thru_mds, error = 0, stripe_index, stripe_pos;
 	struct nfsnode *np;
 	struct nfsfh *fhp;
 	struct nfsclds **dspp;
 
 	np = VTONFS(vp);
 	rel_off = off - flp->nfsfl_patoff;
 	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
 	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
 	    dp->nfsdi_stripecnt;
 	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
 
 	/* Loop around, doing I/O for each stripe unit. */
 	while (len > 0 && error == 0) {
 		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
 		dspp = nfsfldi_addr(dp, stripe_index);
 		if (len > transfer)
 			xfer = transfer;
 		else
 			xfer = len;
 		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
 			/* Dense layout. */
 			if (stripe_pos >= flp->nfsfl_fhcnt)
 				return (EIO);
 			fhp = flp->nfsfl_fh[stripe_pos];
 			io_off = (rel_off / (stripe_unit_size *
 			    dp->nfsdi_stripecnt)) * stripe_unit_size +
 			    rel_off % stripe_unit_size;
 		} else {
 			/* Sparse layout. */
 			if (flp->nfsfl_fhcnt > 1) {
 				if (stripe_index >= flp->nfsfl_fhcnt)
 					return (EIO);
 				fhp = flp->nfsfl_fh[stripe_index];
 			} else if (flp->nfsfl_fhcnt == 1)
 				fhp = flp->nfsfl_fh[0];
 			else
 				fhp = np->n_fhp;
 			io_off = off;
 		}
 		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0)
 			commit_thru_mds = 1;
 		else
 			commit_thru_mds = 0;
 		if (rwflag == FREAD)
 			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
 			    io_off, xfer, fhp, cred, p);
 		else {
 			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
 			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
 			    cred, p);
 			if (error == 0) {
 				NFSLOCKCLSTATE();
 				lyp->nfsly_flags |= NFSLY_WRITTEN;
 				NFSUNLOCKCLSTATE();
 			}
 		}
 		if (error == 0) {
 			transfer = stripe_unit_size;
 			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
 			len -= xfer;
 			off += xfer;
 		}
 	}
 	return (error);
 }
 
 /*
  * The actual read RPC done to a DS.
  */
 static int
 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
     struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	int error, retlen;
 	struct nfsrv_descript nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
 
 	nd->nd_mrep = NULL;
 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 	txdr_hyper(io_off, tl);
 	*(tl + 2) = txdr_unsigned(len);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat != 0) {
 		error = nd->nd_repstat;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	*eofp = fxdr_unsigned(int, *tl);
 	NFSM_STRSIZ(retlen, len);
 	error = nfsm_mbufuio(nd, uiop, retlen);
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 
 /*
  * The actual write RPC done to a DS.
  */
 static int
 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
     struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
 	int32_t backup;
 	struct nfsrv_descript nfsd;
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfssockreq *nrp;
 
 	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
 	nd->nd_mrep = NULL;
 	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	txdr_hyper(io_off, tl);
 	tl += 2;
 	*tl++ = txdr_unsigned(*iomode);
 	*tl = txdr_unsigned(len);
 	nfsm_uiombuf(nd, uiop, len);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error != 0)
 		return (error);
 	if (nd->nd_repstat != 0) {
 		/*
 		 * In case the rpc gets retried, roll
 		 * the uio fileds changed by nfsm_uiombuf()
 		 * back.
 		 */
 		uiop->uio_offset -= len;
 		uio_uio_resid_add(uiop, len);
 		uio_iov_base_add(uiop, -len);
 		uio_iov_len_add(uiop, len);
 		error = nd->nd_repstat;
 	} else {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 		rlen = fxdr_unsigned(int, *tl++);
 		if (rlen == 0) {
 			error = NFSERR_IO;
 			goto nfsmout;
 		} else if (rlen < len) {
 			backup = len - rlen;
 			uio_iov_base_add(uiop, -(backup));
 			uio_iov_len_add(uiop, backup);
 			uiop->uio_offset -= backup;
 			uio_uio_resid_add(uiop, backup);
 			len = rlen;
 		}
 		commit = fxdr_unsigned(int, *tl++);
 
 		/*
 		 * Return the lowest commitment level
 		 * obtained by any of the RPCs.
 		 */
 		if (committed == NFSWRITE_FILESYNC)
 			committed = commit;
 		else if (committed == NFSWRITE_DATASYNC &&
 		    commit == NFSWRITE_UNSTABLE)
 			committed = commit;
 		if (commit_thru_mds != 0) {
 			NFSLOCKMNT(nmp);
 			if (!NFSHASWRITEVERF(nmp)) {
 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 				NFSSETWRITEVERF(nmp);
 	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
 				*must_commit = 1;
 				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
 			}
 			NFSUNLOCKMNT(nmp);
 		} else {
 			NFSLOCKDS(dsp);
 			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
 			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
 				*must_commit = 1;
 				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 			}
 			NFSUNLOCKDS(dsp);
 		}
 	}
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
 	*iomode = committed;
 	if (nd->nd_repstat != 0 && error == 0)
 		error = nd->nd_repstat;
 	return (error);
 }
 
 /*
  * Free up the nfsclds structure.
  */
 void
 nfscl_freenfsclds(struct nfsclds *dsp)
 {
 	int i;
 
 	if (dsp == NULL)
 		return;
 	if (dsp->nfsclds_sockp != NULL) {
 		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
 		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
 		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
 		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
 	}
 	NFSFREEMUTEX(&dsp->nfsclds_mtx);
 	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
 	for (i = 0; i < NFSV4_CBSLOTS; i++) {
 		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
 			m_freem(
 			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
 	}
 	free(dsp, M_NFSCLDS);
 }
 
 static enum nfsclds_state
 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
     struct nfsclds **retdspp)
 {
 	struct nfsclds *dsp, *cur_dsp;
 
 	/*
 	 * Search the list of nfsclds structures for one with the same
 	 * server.
 	 */
 	cur_dsp = NULL;
 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
 		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
 		    dsp->nfsclds_servownlen != 0 &&
 		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
 		    dsp->nfsclds_servownlen) &&
 		    dsp->nfsclds_sess.nfsess_defunct == 0) {
 			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
 			    TAILQ_FIRST(&nmp->nm_sess), dsp,
 			    dsp->nfsclds_flags);
 			/* Server major id matches. */
 			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
 				*retdspp = dsp;
 				return (NFSDSP_USETHISSESSION);
 			}
 
 			/*
 			 * Note the first match, so it can be used for
 			 * sequence'ing new sessions.
 			 */
 			if (cur_dsp == NULL)
 				cur_dsp = dsp;
 		}
 	}
 	if (cur_dsp != NULL) {
 		*retdspp = cur_dsp;
 		return (NFSDSP_SEQTHISSESSION);
 	}
 	return (NFSDSP_NOTFOUND);
 }
 
 #ifdef notyet
 /*
  * NFS commit rpc to a DS.
  */
 static int
 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
     struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff)
 {
 	uint32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	struct nfssockreq *nrp;
 	int error;
 	
 	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
 	    NULL, &dsp->nfsclds_sess);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nrp = dsp->nfsclds_sockp;
 	if (nrp == NULL)
 		/* If NULL, use the MDS socket. */
 		nrp = &nmp->nm_sockreq;
 	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 		NFSLOCKDS(dsp);
 		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
 			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 		NFSUNLOCKDS(dsp);
 	}
 nfsmout:
 	if (error == 0 && nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
 }
 #endif
 
Index: head/sys/fs/nfsclient/nfs_clvnops.c
===================================================================
--- head/sys/fs/nfsclient/nfs_clvnops.c	(revision 318735)
+++ head/sys/fs/nfsclient/nfs_clvnops.c	(revision 318736)
@@ -1,3541 +1,3539 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from nfs_vnops.c	8.16 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * vnode op calls for Sun NFS version 2, 3 and 4
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 #include <fs/nfs/nfsport.h>
 #include <fs/nfsclient/nfsnode.h>
 #include <fs/nfsclient/nfsmount.h>
 #include <fs/nfsclient/nfs.h>
 #include <fs/nfsclient/nfs_kdtrace.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 #include <nfs/nfs_lock.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 dtrace_nfsclient_accesscache_flush_probe_func_t
 		dtrace_nfscl_accesscache_flush_done_probe;
 uint32_t	nfscl_accesscache_flush_done_id;
 
 dtrace_nfsclient_accesscache_get_probe_func_t
 		dtrace_nfscl_accesscache_get_hit_probe,
 		dtrace_nfscl_accesscache_get_miss_probe;
 uint32_t	nfscl_accesscache_get_hit_id;
 uint32_t	nfscl_accesscache_get_miss_id;
 
 dtrace_nfsclient_accesscache_load_probe_func_t
 		dtrace_nfscl_accesscache_load_done_probe;
 uint32_t	nfscl_accesscache_load_done_id;
 #endif /* !KDTRACE_HOOKS */
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 extern struct nfsstatsv1 nfsstatsv1;
 extern int nfsrv_useacl;
 extern int nfscl_debuglevel;
 MALLOC_DECLARE(M_NEWNFSREQ);
 
 static vop_read_t	nfsfifo_read;
 static vop_write_t	nfsfifo_write;
 static vop_close_t	nfsfifo_close;
 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
 		    struct thread *);
 static vop_lookup_t	nfs_lookup;
 static vop_create_t	nfs_create;
 static vop_mknod_t	nfs_mknod;
 static vop_open_t	nfs_open;
 static vop_pathconf_t	nfs_pathconf;
 static vop_close_t	nfs_close;
 static vop_access_t	nfs_access;
 static vop_getattr_t	nfs_getattr;
 static vop_setattr_t	nfs_setattr;
 static vop_read_t	nfs_read;
 static vop_fsync_t	nfs_fsync;
 static vop_remove_t	nfs_remove;
 static vop_link_t	nfs_link;
 static vop_rename_t	nfs_rename;
 static vop_mkdir_t	nfs_mkdir;
 static vop_rmdir_t	nfs_rmdir;
 static vop_symlink_t	nfs_symlink;
 static vop_readdir_t	nfs_readdir;
 static vop_strategy_t	nfs_strategy;
 static	int	nfs_lookitup(struct vnode *, char *, int,
 		    struct ucred *, struct thread *, struct nfsnode **);
 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
 		    struct componentname *);
 static vop_access_t	nfsspec_access;
 static vop_readlink_t	nfs_readlink;
 static vop_print_t	nfs_print;
 static vop_advlock_t	nfs_advlock;
 static vop_advlockasync_t nfs_advlockasync;
 static vop_getacl_t nfs_getacl;
 static vop_setacl_t nfs_setacl;
 static vop_set_text_t nfs_set_text;
 
 /*
  * Global vfs data structures for nfs
  */
 struct vop_vector newnfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		nfs_access,
 	.vop_advlock =		nfs_advlock,
 	.vop_advlockasync =	nfs_advlockasync,
 	.vop_close =		nfs_close,
 	.vop_create =		nfs_create,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_getpages =		ncl_getpages,
 	.vop_putpages =		ncl_putpages,
 	.vop_inactive =		ncl_inactive,
 	.vop_link =		nfs_link,
 	.vop_lookup =		nfs_lookup,
 	.vop_mkdir =		nfs_mkdir,
 	.vop_mknod =		nfs_mknod,
 	.vop_open =		nfs_open,
 	.vop_pathconf =		nfs_pathconf,
 	.vop_print =		nfs_print,
 	.vop_read =		nfs_read,
 	.vop_readdir =		nfs_readdir,
 	.vop_readlink =		nfs_readlink,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_remove =		nfs_remove,
 	.vop_rename =		nfs_rename,
 	.vop_rmdir =		nfs_rmdir,
 	.vop_setattr =		nfs_setattr,
 	.vop_strategy =		nfs_strategy,
 	.vop_symlink =		nfs_symlink,
 	.vop_write =		ncl_write,
 	.vop_getacl =		nfs_getacl,
 	.vop_setacl =		nfs_setacl,
 	.vop_set_text =		nfs_set_text,
 };
 
 struct vop_vector newnfs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		nfsspec_access,
 	.vop_close =		nfsfifo_close,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_inactive =		ncl_inactive,
 	.vop_print =		nfs_print,
 	.vop_read =		nfsfifo_read,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_setattr =		nfs_setattr,
 	.vop_write =		nfsfifo_write,
 };
 
 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct vattr *vap);
 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td);
 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
     struct componentname *scnp, struct sillyrename *sp);
 
 /*
  * Global variables
  */
-#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
-
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfs_prime_access_cache = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
 	   &nfs_prime_access_cache, 0,
 	   "Prime NFS ACCESS cache when fetching attributes");
 
 static int	newnfs_commit_on_close = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
 
 static int	nfs_clean_pages_on_close = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
 	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
 
 int newnfs_directio_enable = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
 	   &newnfs_directio_enable, 0, "Enable NFS directio");
 
 int nfs_keep_dirty_on_error;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW,
     &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned");
 
 /*
  * This sysctl allows other processes to mmap a file that has been opened
  * O_DIRECT by a process.  In general, having processes mmap the file while
  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  * this by default to prevent DoS attacks - to prevent a malicious user from
  * opening up files O_DIRECT preventing other users from mmap'ing these
  * files.  "Protected" environments where stricter consistency guarantees are
  * required can disable this knob.  The process that opened the file O_DIRECT
  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  * meaningful.
  */
 int newnfs_directio_allow_mmap = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
 	   &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
 
 #define	NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY		\
 			 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE	\
 			 | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
 
 /*
  * SMP Locking Note :
  * The list of locks after the description of the lock is the ordering
  * of other locks acquired with the lock held.
  * np->n_mtx : Protects the fields in the nfsnode.
        VM Object Lock
        VI_MTX (acquired indirectly)
  * nmp->nm_mtx : Protects the fields in the nfsmount.
        rep->r_mtx
  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
        nmp->nm_mtx
        rep->r_mtx
  * rep->r_mtx : Protects the fields in an nfsreq.
  */
 
 static int
 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
     struct ucred *cred, u_int32_t *retmode)
 {
 	int error = 0, attrflag, i, lrupos;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
 	    &rmode, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		lrupos = 0;
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (np->n_accesscache[i].uid == cred->cr_uid) {
 				np->n_accesscache[i].mode = rmode;
 				np->n_accesscache[i].stamp = time_second;
 				break;
 			}
 			if (i > 0 && np->n_accesscache[i].stamp <
 			    np->n_accesscache[lrupos].stamp)
 				lrupos = i;
 		}
 		if (i == NFS_ACCESSCACHESIZE) {
 			np->n_accesscache[lrupos].uid = cred->cr_uid;
 			np->n_accesscache[lrupos].mode = rmode;
 			np->n_accesscache[lrupos].stamp = time_second;
 		}
 		mtx_unlock(&np->n_mtx);
 		if (retmode != NULL)
 			*retmode = rmode;
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 #ifdef KDTRACE_HOOKS
 	if (error != 0)
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
 		    error);
 #endif
 	return (error);
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int error = 0, i, gotahit;
 	u_int32_t mode, wmode, rmode;
 	int v34 = NFS_ISV34(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
 	    VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
 	    VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3 or v4, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v34) {
 		if (ap->a_accmode & VREAD)
 			mode = NFSACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_EXECUTE;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 		} else {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_LOOKUP;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 			if (ap->a_accmode & VDELETE_CHILD)
 				mode |= NFSACCESS_MODIFY;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
 				NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
 				NFSACCESS_DELETE | NFSACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		gotahit = 0;
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
 			    if (time_second < (np->n_accesscache[i].stamp
 				+ nfsaccess_cache_timeout) &&
 				(np->n_accesscache[i].mode & mode) == mode) {
 				NFSINCRGLOBAL(nfsstatsv1.accesscache_hits);
 				gotahit = 1;
 			    }
 			    break;
 			}
 		}
 		mtx_unlock(&np->n_mtx);
 #ifdef KDTRACE_HOOKS
 		if (gotahit != 0)
 			KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
 			    ap->a_cred->cr_uid, mode);
 		else
 			KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
 			    ap->a_cred->cr_uid, mode);
 #endif
 		if (gotahit == 0) {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		        error = nfs34_access_otw(vp, wmode, ap->a_td,
 			    ap->a_cred, &rmode);
 			if (!error &&
 			    (rmode & mode) != mode)
 				error = EACCES;
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0) {
 			return (error);
 		}
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		mtx_lock(&np->n_mtx);
 		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			mtx_unlock(&np->n_mtx);
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = ap->a_td;
 
 			if (vp->v_type == VREG)
 				error = ncl_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = ncl_readdirrpc(vp, &auio, ap->a_cred,
 				    ap->a_td);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		} else
 			mtx_unlock(&np->n_mtx);
 		return (error);
 	}
 }
 
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	int error;
 	int fmode = ap->a_mode;
 	struct ucred *cred;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 		return (EOPNOTSUPP);
 
 	/*
 	 * For NFSv4, we need to do the Open Op before cache validation,
 	 * so that we conform to RFC3530 Sec. 9.3.1.
 	 */
 	if (NFS_ISV4(vp)) {
 		error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
 		if (error) {
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 			return (error);
 		}
 	}
 
 	/*
 	 * Now, if this Open will be doing reading, re-validate/flush the
 	 * cache, so that Close/Open coherency is maintained.
 	 */
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
 		error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 			return (EBADF);
 		if (error == EINTR || error == EIO) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 		if (vp->v_type == VDIR)
 			np->n_direofoffset = 0;
 		mtx_unlock(&np->n_mtx);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		np->n_mtime = vattr.va_mtime;
 		if (NFS_ISV4(vp))
 			np->n_change = vattr.va_filerev;
 	} else {
 		mtx_unlock(&np->n_mtx);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
 		    NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 				return (EBADF);
 			if (error == EINTR || error == EIO) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			mtx_lock(&np->n_mtx);
 			np->n_mtime = vattr.va_mtime;
 			if (NFS_ISV4(vp))
 				np->n_change = vattr.va_filerev;
 		}
 	}
 
 	/*
 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
 	 */
 	if (newnfs_directio_enable && (fmode & O_DIRECT) &&
 	    (vp->v_type == VREG)) {
 		if (np->n_directio_opens == 0) {
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 				return (EBADF);
 			if (error) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			mtx_lock(&np->n_mtx);
 			np->n_flag |= NNONCACHE;
 		}
 		np->n_directio_opens++;
 	}
 
 	/* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
 	if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
 		np->n_flag |= NWRITEOPENED;
 
 	/*
 	 * If this is an open for writing, capture a reference to the
 	 * credentials, so they can be used by ncl_putpages(). Using
 	 * these write credentials is preferable to the credentials of
 	 * whatever thread happens to be doing the VOP_PUTPAGES() since
 	 * the write RPCs are less likely to fail with EACCES.
 	 */
 	if ((fmode & FWRITE) != 0) {
 		cred = np->n_writecred;
 		np->n_writecred = crhold(ap->a_cred);
 	} else
 		cred = NULL;
 	mtx_unlock(&np->n_mtx);
 
 	if (cred != NULL)
 		crfree(cred);
 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to ncl_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NFS Version 4 - flush the dirty buffers and commit them, if
  *		       nfscl_mustflush() says this is necessary.
  *                     It is necessary if there is no write delegation held,
  *                     in order to satisfy open/close coherency.
  *                     If the file isn't cached on local stable storage,
  *                     it may be necessary in order to detect "out of space"
  *                     errors from the server, if the write delegation
  *                     issued by the server doesn't allow the file to grow.
  */
 /* ARGSUSED */
 static int
 nfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 	struct ucred *cred;
 	int error = 0, ret, localcred = 0;
 	int fmode = ap->a_fflag;
 
 	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
 		return (0);
 	/*
 	 * During shutdown, a_cred isn't valid, so just use root.
 	 */
 	if (ap->a_cred == NOCRED) {
 		cred = newnfs_getcred();
 		localcred = 1;
 	} else {
 		cred = ap->a_cred;
 	}
 	if (vp->v_type == VREG) {
 	    /*
 	     * Examine and clean dirty pages, regardless of NMODIFIED.
 	     * This closes a major hole in close-to-open consistency.
 	     * We want to push out all dirty pages (and buffers) on
 	     * close, regardless of whether they were dirtied by
 	     * mmap'ed writes or via write().
 	     */
 	    if (nfs_clean_pages_on_close && vp->v_object) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	    }
 	    mtx_lock(&np->n_mtx);
 	    if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
 		if (NFS_ISV3(vp)) {
 		    /*
 		     * Under NFSv3 we have dirty buffers to dispose of.  We
 		     * must flush them to the NFS server.  We have the option
 		     * of waiting all the way through the commit rpc or just
 		     * waiting for the initial write.  The default is to only
 		     * wait through the initial write so the data is in the
 		     * server's cache, which is roughly similar to the state
 		     * a standard disk subsystem leaves the file in on close().
 		     *
 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
 		     * potential races with other processes, and certainly
 		     * cannot clear it if we don't commit.
 		     * These races occur when there is no longer the old
 		     * traditional vnode locking implemented for Vnode Ops.
 		     */
 		    int cm = newnfs_commit_on_close ? 1 : 0;
 		    error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
 			if (nfscl_mustflush(vp) != 0) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
 				error = ncl_flush(vp, MNT_WAIT, ap->a_td,
 				    cm, 0);
 				/*
 				 * as above w.r.t races when clearing
 				 * NMODIFIED.
 				 * np->n_flag &= ~NMODIFIED;
 				 */
 			}
 		} else {
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 				return (EBADF);
 		}
 		mtx_lock(&np->n_mtx);
 	    }
  	    /* 
  	     * Invalidate the attribute cache in all cases.
  	     * An open is going to fetch fresh attrs any way, other procs
  	     * on this node that have file open will be forced to do an 
  	     * otw attr fetch, but this is safe.
 	     * --> A user found that their RPC count dropped by 20% when
 	     *     this was commented out and I can't see any requirement
 	     *     for it, so I've disabled it when negative lookups are
 	     *     enabled. (What does this have to do with negative lookup
 	     *     caching? Well nothing, except it was reported by the
 	     *     same user that needed negative lookup caching and I wanted
 	     *     there to be a way to disable it to see if it
 	     *     is the cause of some caching/coherency issue that might
 	     *     crop up.)
  	     */
 	    if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
 		    np->n_attrstamp = 0;
 		    KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	    mtx_unlock(&np->n_mtx);
 	}
 
 	if (NFS_ISV4(vp)) {
 		/*
 		 * Get attributes so "change" is up to date.
 		 */
 		if (error == 0 && nfscl_mustflush(vp) != 0 &&
 		    vp->v_type == VREG &&
 		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
 			    NULL);
 			if (!ret) {
 				np->n_change = nfsva.na_filerev;
 				(void) nfscl_loadattrcache(&vp, &nfsva, NULL,
 				    NULL, 0, 0);
 			}
 		}
 
 		/*
 		 * and do the close.
 		 */
 		ret = nfsrpc_close(vp, 0, ap->a_td);
 		if (!error && ret)
 			error = ret;
 		if (error)
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 	}
 	if (newnfs_directio_enable)
 		KASSERT((np->n_directio_asyncwr == 0),
 			("nfs_close: dirty unflushed (%d) directio buffers\n",
 			 np->n_directio_asyncwr));
 	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 		mtx_lock(&np->n_mtx);
 		KASSERT((np->n_directio_opens > 0), 
 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
 		np->n_directio_opens--;
 		if (np->n_directio_opens == 0)
 			np->n_flag &= ~NNONCACHE;
 		mtx_unlock(&np->n_mtx);
 	}
 	if (localcred)
 		NFSFREECRED(cred);
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;	/* XXX */
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct nfsvattr nfsva;
 	struct vattr *vap = ap->a_vap;
 	struct vattr vattr;
 
 	/*
 	 * Update local times for special files.
 	 */
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	mtx_unlock(&np->n_mtx);
 	/*
 	 * First look in the cache.
 	 */
 	if (ncl_getattrcache(vp, &vattr) == 0) {
 		vap->va_type = vattr.va_type;
 		vap->va_mode = vattr.va_mode;
 		vap->va_nlink = vattr.va_nlink;
 		vap->va_uid = vattr.va_uid;
 		vap->va_gid = vattr.va_gid;
 		vap->va_fsid = vattr.va_fsid;
 		vap->va_fileid = vattr.va_fileid;
 		vap->va_size = vattr.va_size;
 		vap->va_blocksize = vattr.va_blocksize;
 		vap->va_atime = vattr.va_atime;
 		vap->va_mtime = vattr.va_mtime;
 		vap->va_ctime = vattr.va_ctime;
 		vap->va_gen = vattr.va_gen;
 		vap->va_flags = vattr.va_flags;
 		vap->va_rdev = vattr.va_rdev;
 		vap->va_bytes = vattr.va_bytes;
 		vap->va_filerev = vattr.va_filerev;
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 		return (0);
 	}
 
 	if (NFS_ISV34(vp) && nfs_prime_access_cache &&
 	    nfsaccess_cache_timeout > 0) {
 		NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
 		if (ncl_getattrcache(vp, ap->a_vap) == 0) {
 			nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
 			return (0);
 		}
 	}
 	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
 	if (!error)
 		error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
 	if (!error) {
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct thread *td = curthread;	/* XXX */
 	struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags and marking of atimes are not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);		
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			/*
 			 *  We run vnode_pager_setsize() early (why?),
 			 * we must set np->n_size now to avoid vinvalbuf
 			 * V_SAVE races that might setsize a lower
 			 * value.
 			 */
 			mtx_lock(&np->n_mtx);
 			tsize = np->n_size;
 			mtx_unlock(&np->n_mtx);
 			error = ncl_meta_setsize(vp, ap->a_cred, td,
 			    vap->va_size);
 			mtx_lock(&np->n_mtx);
  			if (np->n_flag & NMODIFIED) {
 			    tsize = np->n_size;
 			    mtx_unlock(&np->n_mtx);
 			    error = ncl_vinvalbuf(vp, vap->va_size == 0 ?
 			        0 : V_SAVE, td, 1);
 			    if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 				    error = EBADF;
  			    if (error != 0) {
 				    vnode_pager_setsize(vp, tsize);
 				    return (error);
 			    }
 			    /*
 			     * Call nfscl_delegmodtime() to set the modify time
 			     * locally, as required.
 			     */
 			    nfscl_delegmodtime(vp);
  			} else
 			    mtx_unlock(&np->n_mtx);
 			/*
 			 * np->n_size has already been set to vap->va_size
 			 * in ncl_meta_setsize(). We must set it again since
 			 * nfs_loadattrcache() could be called through
 			 * ncl_meta_setsize() and could modify np->n_size.
 			 */
 			mtx_lock(&np->n_mtx);
  			np->n_vattr.na_size = np->n_size = vap->va_size;
 			mtx_unlock(&np->n_mtx);
   		}
   	} else {
 		mtx_lock(&np->n_mtx);
 		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
 			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 				return (EBADF);
 			if (error == EINTR || error == EIO)
 				return (error);
 		} else
 			mtx_unlock(&np->n_mtx);
 	}
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
 	if (error && vap->va_size != VNOVAL) {
 		mtx_lock(&np->n_mtx);
 		np->n_size = np->n_vattr.na_size = tsize;
 		vnode_pager_setsize(vp, tsize);
 		mtx_unlock(&np->n_mtx);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsnode *np = VTONFS(vp);
 	int error, ret, attrflag, i;
 	struct nfsvattr nfsva;
 
 	if (NFS_ISV34(vp)) {
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
 			np->n_accesscache[i].stamp = 0;
 		np->n_flag |= NDELEGMOD;
 		mtx_unlock(&np->n_mtx);
 		KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
 	}
 	error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
 	    NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(struct vop_lookup_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct mount *mp = dvp->v_mount;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	struct nfsmount *nmp;
 	struct nfsnode *np, *newnp;
 	int error = 0, attrflag, dattrflag, ltype, ncticks;
 	struct thread *td = cnp->cn_thread;
 	struct nfsfh *nfhp;
 	struct nfsvattr dnfsva, nfsva;
 	struct vattr vattr;
 	struct timespec nctime;
 	
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	nmp = VFSTONFS(mp);
 	np = VTONFS(dvp);
 
 	/* For NFSv4, wait until any remove is done. */
 	mtx_lock(&np->n_mtx);
 	while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
 		np->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
 	}
 	mtx_unlock(&np->n_mtx);
 
 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 		return (error);
 	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
 	if (error > 0 && error != ENOENT)
 		return (error);
 	if (error == -1) {
 		/*
 		 * Lookups of "." are special and always return the
 		 * current directory.  cache_lookup() already handles
 		 * associated locking bookkeeping, etc.
 		 */
 		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 			/* XXX: Is this really correct? */
 			if (cnp->cn_nameiop != LOOKUP &&
 			    (flags & ISLASTCN))
 				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 
 		/*
 		 * We only accept a positive hit in the cache if the
 		 * change time of the file matches our cached copy.
 		 * Otherwise, we discard the cache entry and fallback
 		 * to doing a lookup RPC.  We also only trust cache
 		 * entries for less than nm_nametimeo seconds.
 		 *
 		 * To better handle stale file handles and attributes,
 		 * clear the attribute cache of this node if it is a
 		 * leaf component, part of an open() call, and not
 		 * locally modified before fetching the attributes.
 		 * This should allow stale file handles to be detected
 		 * here where we can fall back to a LOOKUP RPC to
 		 * recover rather than having nfs_open() detect the
 		 * stale file handle and failing open(2) with ESTALE.
 		 */
 		newvp = *vpp;
 		newnp = VTONFS(newvp);
 		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
 		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(newnp->n_flag & NMODIFIED)) {
 			mtx_lock(&newnp->n_mtx);
 			newnp->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			mtx_unlock(&newnp->n_mtx);
 		}
 		if (nfscl_nodeleg(newvp, 0) == 0 ||
 		    ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
 		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 			if (cnp->cn_nameiop != LOOKUP &&
 			    (flags & ISLASTCN))
 				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 		cache_purge(newvp);
 		if (dvp != newvp)
 			vput(newvp);
 		else 
 			vrele(newvp);
 		*vpp = NULLVP;
 	} else if (error == ENOENT) {
 		if (dvp->v_iflag & VI_DOOMED)
 			return (ENOENT);
 		/*
 		 * We only accept a negative hit in the cache if the
 		 * modification time of the parent directory matches
 		 * the cached copy in the name cache entry.
 		 * Otherwise, we discard all of the negative cache
 		 * entries for this directory.  We also only trust
 		 * negative cache entries for up to nm_negnametimeo
 		 * seconds.
 		 */
 		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 			return (ENOENT);
 		}
 		cache_purge_negative(dvp);
 	}
 
 	error = 0;
 	newvp = NULLVP;
 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
 	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 	    NULL);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (error) {
 		if (newvp != NULLVP) {
 			vput(newvp);
 			*vpp = NULLVP;
 		}
 
 		if (error != ENOENT) {
 			if (NFS_ISV4(dvp))
 				error = nfscl_maperr(td, error, (uid_t)0,
 				    (gid_t)0);
 			return (error);
 		}
 
 		/* The requested file was not found. */
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN)) {
 			/*
 			 * XXX: UFS does a full VOP_ACCESS(dvp,
 			 * VWRITE) here instead of just checking
 			 * MNT_RDONLY.
 			 */
 			if (mp->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 
 		if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
 			/*
 			 * Cache the modification time of the parent
 			 * directory from the post-op attributes in
 			 * the name cache entry.  The negative cache
 			 * entry will be ignored once the directory
 			 * has changed.  Don't bother adding the entry
 			 * if the directory has already changed.
 			 */
 			mtx_lock(&np->n_mtx);
 			if (timespeccmp(&np->n_vattr.na_mtime,
 			    &dnfsva.na_mtime, ==)) {
 				mtx_unlock(&np->n_mtx);
 				cache_enter_time(dvp, NULL, cnp,
 				    &dnfsva.na_mtime, NULL);
 			} else
 				mtx_unlock(&np->n_mtx);
 		}
 		return (ENOENT);
 	}
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 			FREE((caddr_t)nfhp, M_NFSFH);
 			return (EISDIR);
 		}
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    LK_EXCLUSIVE);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 		*vpp = newvp;
 		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		ltype = NFSVOPISLOCKED(dvp);
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			NFSVOPUNLOCK(dvp, 0);
 			error = vfs_busy(mp, 0);
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 			vfs_rel(mp);
 			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 				vfs_unbusy(mp);
 				error = ENOENT;
 			}
 			if (error != 0)
 				return (error);
 		}
 		NFSVOPUNLOCK(dvp, 0);
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    cnp->cn_lkflags);
 		if (error == 0)
 			newvp = NFSTOV(np);
 		vfs_unbusy(mp);
 		if (newvp != dvp)
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
 			if (error == 0) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 			error = ENOENT;
 		}
 		if (error != 0)
 			return (error);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 		FREE((caddr_t)nfhp, M_NFSFH);
 		VREF(dvp);
 		newvp = dvp;
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else {
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    cnp->cn_lkflags);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 		else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(np->n_flag & NMODIFIED)) {			
 			/*
 			 * Flush the attribute cache when opening a
 			 * leaf node to ensure that fresh attributes
 			 * are fetched in nfs_open() since we did not
 			 * fetch attributes from the LOOKUP reply.
 			 */
 			mtx_lock(&np->n_mtx);
 			np->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			mtx_unlock(&np->n_mtx);
 		}
 	}
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
 	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 		    newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime);
 	*vpp = newvp;
 	return (0);
 }
 
 /*
  * nfs read call.
  * Just call ncl_bioread() to do the work.
  */
 static int
 nfs_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	switch (vp->v_type) {
 	case VREG:
 		return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 	case VDIR:
 		return (EISDIR);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by ncl_doio() from below the buffer cache.
  */
 int
 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
 	    &attrflag, NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, NULL, NULL,
 		    NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
 		    &attrflag, NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     int *iomode, int *must_commit, int called_from_strategy)
 {
 	struct nfsvattr nfsva;
 	int error, attrflag, ret;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, iomode, must_commit,
 		    NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
 		    uiop->uio_td, &nfsva, &attrflag, NULL,
 		    called_from_strategy);
 	if (attrflag) {
 		if (VTONFS(vp)->n_flag & ND_NFSV4)
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
 			    1);
 		else
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 			    1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (DOINGASYNC(vp))
 		*iomode = NFSWRITE_FILESYNC;
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct vattr *vap)
 {
 	struct nfsvattr nfsva, dnfsva;
 	struct vnode *newvp = NULL;
 	struct nfsnode *np = NULL, *dnp;
 	struct nfsfh *nfhp;
 	struct vattr vattr;
 	int error = 0, attrflag, dattrflag;
 	u_int32_t rdev;
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = vap->va_rdev;
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = 0xffffffff;
 	else
 		return (EOPNOTSUPP);
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
 	    rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 	if (!error) {
 		if (!nfhp)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 			    NULL);
 		if (nfhp)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag != 0) {
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		*vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(struct vop_mknod_args *ap)
 {
 	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 }
 
 static struct mtx nfs_cverf_mtx;
 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
     MTX_DEF);
 
 static nfsquad_t
 nfs_get_cverf(void)
 {
 	static nfsquad_t cverf;
 	nfsquad_t ret;
 	static int cverf_initialized = 0;
 
 	mtx_lock(&nfs_cverf_mtx);
 	if (cverf_initialized == 0) {
 		cverf.lval[0] = arc4random();
 		cverf.lval[1] = arc4random();
 		cverf_initialized = 1;
 	} else
 		cverf.qval++;
 	ret = cverf;
 	mtx_unlock(&nfs_cverf_mtx);
 
 	return (ret);
 }
 
 /*
  * nfs file create call
  */
 static int
 nfs_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct nfsmount *nmp;
 	struct nfsvattr dnfsva, nfsva;
 	struct nfsfh *nfhp;
 	nfsquad_t cverf;
 	int error = 0, attrflag, dattrflag, fmode = 0;
 	struct vattr vattr;
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 	dnp = VTONFS(dvp);
 	nmp = VFSTONFS(vnode_mount(dvp));
 again:
 	/* For NFSv4, wait until any remove is done. */
 	mtx_lock(&dnp->n_mtx);
 	while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
 		dnp->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
 	}
 	mtx_unlock(&dnp->n_mtx);
 
 	cverf = nfs_get_cverf();
 	error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, NULL);
 	if (!error) {
 		if (nfhp == NULL)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 			    NULL);
 		if (nfhp != NULL)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag == 0)
 			error = nfsrpc_getattr(newvp, cnp->cn_cred,
 			    cnp->cn_thread, &nfsva, NULL);
 		if (error == 0)
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	}
 	if (error) {
 		if (newvp != NULL) {
 			vput(newvp);
 			newvp = NULL;
 		}
 		if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
 		    error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 		if (nfscl_checksattr(vap, &nfsva)) {
 			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 			    cnp->cn_thread, &nfsva, &attrflag, NULL);
 			if (error && (vap->va_uid != (uid_t)VNOVAL ||
 			    vap->va_gid != (gid_t)VNOVAL)) {
 				/* try again without setting uid/gid */
 				vap->va_uid = (uid_t)VNOVAL;
 				vap->va_gid = (uid_t)VNOVAL;
 				error = nfsrpc_setattr(newvp, vap, NULL, 
 				    cnp->cn_cred, cnp->cn_thread, &nfsva,
 				    &attrflag, NULL);
 			}
 			if (attrflag)
 				(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				    NULL, 0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		if ((cnp->cn_flags & MAKEENTRY) && attrflag)
 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 			    NULL);
 		*ap->a_vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 	KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 			error = EBADF;
 		else if (error != EINTR && error != EIO)
 			/* Do the rpc */
 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	mtx_lock(&np->n_mtx);
 	np->n_attrstamp = 0;
 	mtx_unlock(&np->n_mtx);
 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 {
 	/*
 	 * Make sure that the directory vnode is still valid.
 	 * XXX we should lock sp->s_dvp here.
 	 */
 	if (sp->s_dvp->v_type == VBAD)
 		return (0);
 	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
 	    sp->s_cred, NULL));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
  */
 static int
 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr dnfsva;
 	struct nfsnode *dnp = VTONFS(dvp);
 	int error = 0, dattrflag;
 
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NREMOVEINPROG;
 	mtx_unlock(&dnp->n_mtx);
 	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
 	    &dattrflag, NULL);
 	mtx_lock(&dnp->n_mtx);
 	if ((dnp->n_flag & NREMOVEWANT)) {
 		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
 		mtx_unlock(&dnp->n_mtx);
 		wakeup((caddr_t)dnp);
 	} else {
 		dnp->n_flag &= ~NREMOVEINPROG;
 		mtx_unlock(&dnp->n_mtx);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct nfsnode *fnp = VTONFS(ap->a_fvp);
 	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 	struct nfsv4node *newv4 = NULL;
 	int error;
 
 	KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
 	    (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	if (fvp == tvp) {
 		printf("nfs_rename: fvp == tvp (can't happen)\n");
 		error = 0;
 		goto out;
 	}
 	if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
 		goto out;
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 * 
 	 * Skip the rename operation if the fsync fails, this can happen
 	 * due to the server's volume being full, when we pushed out data
 	 * that was written back to our cache earlier. Not checking for
 	 * this condition can result in potential (silent) data loss.
 	 */
 	error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 	NFSVOPUNLOCK(fvp, 0);
 	if (!error && tvp)
 		error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 	if (error)
 		goto out;
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 	    tcnp->cn_thread);
 
 	if (error == 0 && NFS_ISV4(tdvp)) {
 		/*
 		 * For NFSv4, check to see if it is the same name and
 		 * replace the name, if it is different.
 		 */
 		MALLOC(newv4, struct nfsv4node *,
 		    sizeof (struct nfsv4node) +
 		    tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
 		    M_NFSV4NODE, M_WAITOK);
 		mtx_lock(&tdnp->n_mtx);
 		mtx_lock(&fnp->n_mtx);
 		if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
 		    (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
 		      NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
 		      tcnp->cn_namelen) ||
 		      tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
 		      NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			tdnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
 bcopy(tcnp->cn_nameptr, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("ren replace=%s\n",nnn);
 }
 #endif
 			FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
 			fnp->n_v4 = newv4;
 			newv4 = NULL;
 			fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
 			fnp->n_v4->n4_namelen = tcnp->cn_namelen;
 			NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			    tdnp->n_fhp->nfh_len);
 			NFSBCOPY(tcnp->cn_nameptr,
 			    NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
 		}
 		mtx_unlock(&tdnp->n_mtx);
 		mtx_unlock(&fnp->n_mtx);
 		if (newv4 != NULL)
 			FREE((caddr_t)newv4, M_NFSV4NODE);
 	}
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
     struct sillyrename *sp)
 {
 
 	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
 	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
 	    scnp->cn_thread));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
     int tnamelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr fnfsva, tnfsva;
 	struct nfsnode *fdnp = VTONFS(fdvp);
 	struct nfsnode *tdnp = VTONFS(tdvp);
 	int error = 0, fattrflag, tattrflag;
 
 	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
 	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
 	    &tattrflag, NULL, NULL);
 	mtx_lock(&fdnp->n_mtx);
 	fdnp->n_flag |= NMODIFIED;
 	if (fattrflag != 0) {
 		mtx_unlock(&fdnp->n_mtx);
 		(void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
 	} else {
 		fdnp->n_attrstamp = 0;
 		mtx_unlock(&fdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 	}
 	mtx_lock(&tdnp->n_mtx);
 	tdnp->n_flag |= NMODIFIED;
 	if (tattrflag != 0) {
 		mtx_unlock(&tdnp->n_mtx);
 		(void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		mtx_unlock(&tdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (error && NFS_ISV4(fdvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np, *tdnp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag;
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 
 	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
 	    &dattrflag, NULL);
 	tdnp = VTONFS(tdvp);
 	mtx_lock(&tdnp->n_mtx);
 	tdnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&tdnp->n_mtx);
 		(void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		mtx_unlock(&tdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 	else {
 		np = VTONFS(vp);
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 		mtx_unlock(&np->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL);
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 		    (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsvattr nfsva, dnfsva;
 	struct nfsfh *nfhp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	int error = 0, attrflag, dattrflag, ret;
 
 	vap->va_type = VLNK;
 	error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 		    &np, NULL, LK_EXCLUSIVE);
 		if (!ret)
 			newvp = NFSTOV(np);
 		else if (!error)
 			error = ret;
 	}
 	if (newvp != NULL) {
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else if (!error) {
 		/*
 		 * If we do not have an error and we could not extract the
 		 * newvp from the response due to the request being NFSv2, we
 		 * have to do a lookup in order to obtain a newvp to return.
 		 */
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_thread, &np);
 		if (!error)
 			newvp = NFSTOV(np);
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(cnp->cn_thread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		*ap->a_vpp = newvp;
 	}
 
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL);
 	}
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct vattr vattr;
 	struct nfsfh *nfhp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag, ret;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 		return (error);
 	vap->va_type = VDIR;
 	error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
 	    &attrflag, &dattrflag, NULL);
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 		    &np, NULL, LK_EXCLUSIVE);
 		if (!ret) {
 			newvp = NFSTOV(np);
 			if (attrflag)
 			   (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				NULL, 0, 1);
 		} else if (!error)
 			error = ret;
 	}
 	if (!error && newvp == NULL) {
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_thread, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(cnp->cn_thread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		/*
 		 * If negative lookup caching is enabled, I might as well
 		 * add an entry for this node. Not necessary for correctness,
 		 * but if negative caching is enabled, then the system
 		 * must care about lookup caching hit rate, so...
 		 */
 		if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 		    (cnp->cn_flags & MAKEENTRY) &&
 		    attrflag != 0 && dattrflag != 0)
 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 			    &dnfsva.na_ctime);
 		*ap->a_vpp = newvp;
 	}
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *dnp;
 	struct nfsvattr dnfsva;
 	int error, dattrflag;
 
 	if (dvp == vp)
 		return (EINVAL);
 	error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 
 	cache_purge(dvp);
 	cache_purge(vp);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 		    (gid_t)0);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct uio *uio = ap->a_uio;
 	ssize_t tresid, left;
 	int error = 0;
 	struct vattr vattr;
 	
 	if (ap->a_eofflag != NULL)
 		*ap->a_eofflag = 0;
 	if (vp->v_type != VDIR) 
 		return(EPERM);
 
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 			mtx_lock(&np->n_mtx);
 			if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
 			    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 				mtx_unlock(&np->n_mtx);
 				NFSINCRGLOBAL(nfsstatsv1.direofcache_hits);
 				if (ap->a_eofflag != NULL)
 					*ap->a_eofflag = 1;
 				return (0);
 			} else
 				mtx_unlock(&np->n_mtx);
 		}
 	}
 
 	/*
 	 * NFS always guarantees that directory entries don't straddle
 	 * DIRBLKSIZ boundaries.  As such, we need to limit the size
 	 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
 	 * directory entry.
 	 */
 	left = uio->uio_resid % DIRBLKSIZ;
 	if (left == uio->uio_resid)
 		return (EINVAL);
 	uio->uio_resid -= left;
 
 	/*
 	 * Call ncl_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = ncl_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid) {
 		NFSINCRGLOBAL(nfsstatsv1.direofcache_misses);
 		if (ap->a_eofflag != NULL)
 			*ap->a_eofflag = 1;
 	}
 	
 	/* Add the partial DIRBLKSIZ (left) back in. */
 	uio->uio_resid += left;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by ncl_doio().
  */
 int
 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, eof, attrflag;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);		
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 
 	error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at the end of the directory or have filled
 		 * the block.
 		 */
 		if (eof)
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
  */
 int
 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, attrflag, eof;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 	error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at end of the directory or have filled the
 		 * the block.
 		 */
 		if (eof)
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirplusrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 	struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 	unsigned int lticks;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 	KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 	    M_NEWNFSREQ, M_WAITOK);
 	sp->s_cred = crhold(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* 
 	 * Fudge together a funny name.
 	 * Changing the format of the funny name to accommodate more 
 	 * sillynames per directory.
 	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 	 * CPU ticks since boot.
 	 */
 	pid = cnp->cn_thread->td_proc->p_pid;
 	lticks = (unsigned int)ticks;
 	for ( ; ; ) {
 		sp->s_namlen = sprintf(sp->s_name, 
 				       ".nfs.%08x.%04x4.4", lticks, 
 				       pid);
 		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 				 cnp->cn_thread, NULL))
 			break;
 		lticks++;
 	}
 	error = nfs_renameit(dvp, vp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_thread, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NEWNFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
     struct thread *td, struct nfsnode **npp)
 {
 	struct vnode *newvp = NULL, *vp;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	struct nfsfh *nfhp, *onfhp;
 	struct nfsvattr nfsva, dnfsva;
 	struct componentname cn;
 	int error = 0, attrflag, dattrflag;
 	u_int hash;
 
 	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, NULL);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (npp && !error) {
 		if (*npp != NULL) {
 		    np = *npp;
 		    vp = NFSTOV(np);
 		    /*
 		     * For NFSv4, check to see if it is the same name and
 		     * replace the name, if it is different.
 		     */
 		    if (np->n_v4 != NULL && nfsva.na_type == VREG &&
 			(np->n_v4->n4_namelen != len ||
 			 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 			 dnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (len < 100) ? len : 99;
 bcopy(name, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("replace=%s\n",nnn);
 }
 #endif
 			    FREE((caddr_t)np->n_v4, M_NFSV4NODE);
 			    MALLOC(np->n_v4, struct nfsv4node *,
 				sizeof (struct nfsv4node) +
 				dnp->n_fhp->nfh_len + len - 1,
 				M_NFSV4NODE, M_WAITOK);
 			    np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 			    np->n_v4->n4_namelen = len;
 			    NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 				dnp->n_fhp->nfh_len);
 			    NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
 		    }
 		    hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
 			FNV1_32_INIT);
 		    onfhp = np->n_fhp;
 		    /*
 		     * Rehash node for new file handle.
 		     */
 		    vfs_hash_rehash(vp, hash);
 		    np->n_fhp = nfhp;
 		    if (onfhp != NULL)
 			FREE((caddr_t)onfhp, M_NFSFH);
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
 		    FREE((caddr_t)nfhp, M_NFSFH);
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    cn.cn_nameptr = name;
 		    cn.cn_namelen = len;
 		    error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
 			&np, NULL, LK_EXCLUSIVE);
 		    if (error)
 			return (error);
 		    newvp = NFSTOV(np);
 		}
 		if (!attrflag && *npp == NULL) {
 			if (newvp == dvp)
 				vrele(newvp);
 			else
 				vput(newvp);
 			return (ENOENT);
 		}
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	}
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Nfs Version 3 and 4 commit rpc
  */
 int
 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
    struct thread *td)
 {
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error, attrflag;
 
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (0);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 	error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
 	    &attrflag, NULL);
 	if (attrflag != 0)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
 		    0, 1);
 	if (error != 0 && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
  * request.
  */
 static int
 nfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp;
 	struct vnode *vp;
 	struct ucred *cr;
 
 	bp = ap->a_bp;
 	vp = ap->a_vp;
 	KASSERT(bp->b_vp == vp, ("missing b_getvp"));
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	BUF_ASSERT_HELD(bp);
 
 	if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno)
 		bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize /
 		    DEV_BSIZE);
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 	    ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread))
 		(void) ncl_doio(vp, bp, cr, curthread, 1);
 	return (0);
 }
 
 /*
  * fsync vnode op. Just call ncl_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(struct vop_fsync_args *ap)
 {
 
 	if (ap->a_vp->v_type != VREG) {
 		/*
 		 * For NFS, metadata is changed synchronously on the server,
 		 * so there is nothing to flush. Also, ncl_flush() clears
 		 * the NMODIFIED flag and that shouldn't be done here for
 		 * directories.
 		 */
 		return (0);
 	}
 	return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  * If the called_from_renewthread argument is TRUE, it has been called
  * from the NFSv4 renew thread and, as such, cannot block indefinitely
  * waiting for a buffer write to complete.
  */
 int
 ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
     int commit, int called_from_renewthread)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct buf *bp;
 	int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1, trycnt = 0;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 	struct bufobj *bo;
 #ifndef NFS_COMMITBVECSIZ
 #define	NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (called_from_renewthread != 0)
 		slptimeo = hz;
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	bo = &vp->v_bufobj;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but has not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, ncl_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV34(vp) && commit) {
 		if (bvec != NULL && bvec != bvec_on_stack)
 			free(bvec, M_TEMP);
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		BO_LOCK(bo);
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (!BUF_ISLOCKED(bp) &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			/*
 			 * Release the vnode interlock to avoid a lock
 			 * order reversal.
 			 */
 			BO_UNLOCK(bo);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			BO_LOCK(bo);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bvecpos >= bvecsize)
 				break;
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT)) {
 				BUF_UNLOCK(bp);
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			BO_UNLOCK(bo);
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to
 			 * initiate I/O on the bp.
 			 *
 			 * Note: to avoid loopback deadlocks, we do not
 			 * assign b_runningbufspace.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			vfs_busy_pages(bp, 1);
 
 			BO_LOCK(bo);
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_bobufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		BO_UNLOCK(bo);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = ncl_commit(vp, off, (int)(endoff - off),
 					  wcred, td);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = ncl_commit(vp, off, (int)size,
 						  bp->b_wcred, td);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			ncl_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				bufobj_wref(bo);
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~B_DONE;
 				bp->b_ioflags &= ~BIO_ERROR;
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				bufdone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	BO_LOCK(bo);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 
 			error = BUF_TIMELOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
 			if (error == 0) {
 				BUF_UNLOCK(bp);
 				goto loop;
 			}
 			if (error == ENOLCK) {
 				error = 0;
 				goto loop;
 			}
 			if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so the flush will be retried
 				 * later.
 				 */
 				error = EIO;
 				goto done;
 			}
 			if (newnfs_sigintr(nmp, td)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= B_ASYNC;
 		bwrite(bp);
 		if (newnfs_sigintr(nmp, td)) {
 			error = EINTR;
 			goto done;
 		}
 		goto loop;
 	}
 	if (passone) {
 		passone = 0;
 		BO_UNLOCK(bo);
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (bo->bo_numoutput) {
 			error = bufobj_wwait(bo, slpflag, slptimeo);
 			if (error) {
 			    BO_UNLOCK(bo);
 			    if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so that the flush will be
 				 * retried later.
 				 */
 				error = EIO;
 				goto done;
 			    }
 			    error = newnfs_sigintr(nmp, td);
 			    if (error)
 				goto done;
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			    BO_LOCK(bo);
 			}
 		}
 		if (bo->bo_dirty.bv_cnt != 0 && commit) {
 			BO_UNLOCK(bo);
 			goto loop;
 		}
 		/*
 		 * Wait for all the async IO requests to drain
 		 */
 		BO_UNLOCK(bo);
 		mtx_lock(&np->n_mtx);
 		while (np->n_directio_asyncwr > 0) {
 			np->n_flag |= NFSYNCWAIT;
 			error = newnfs_msleep(td, &np->n_directio_asyncwr,
 			    &np->n_mtx, slpflag | (PRIBIO + 1), 
 			    "nfsfsync", 0);
 			if (error) {
 				if (newnfs_sigintr(nmp, td)) {
 					mtx_unlock(&np->n_mtx);
 					error = EINTR;	
 					goto done;
 				}
 			}
 		}
 		mtx_unlock(&np->n_mtx);
 	} else
 		BO_UNLOCK(bo);
 	if (NFSHASPNFS(nmp)) {
 		nfscl_layoutcommit(vp, td);
 		/*
 		 * Invalidate the attribute cache, since writes to a DS
 		 * won't update the size attribute.
 		 */
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 	} else
 		mtx_lock(&np->n_mtx);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
 	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
   		np->n_flag &= ~NMODIFIED;
 	mtx_unlock(&np->n_mtx);
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
 	    np->n_directio_asyncwr != 0)) {
 		if (trycnt++ < 5) {
 			/* try, try again... */
 			passone = 1;
 			wcred = NULL;
 			bvec = NULL;
 			bvecsize = 0;
 			goto again;
 		}
 		vn_printf(vp, "ncl_flush failed");
 		error = called_from_renewthread != 0 ? EIO : EBUSY;
 	}
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred;
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	struct proc *p = (struct proc *)ap->a_id;
 	struct thread *td = curthread;	/* XXX */
 	struct vattr va;
 	int ret, error = EOPNOTSUPP;
 	u_quad_t size;
 	
 	if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
 		if (vp->v_type != VREG)
 			return (EINVAL);
 		if ((ap->a_flags & F_POSIX) != 0)
 			cred = p->p_ucred;
 		else
 			cred = td->td_ucred;
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (vp->v_iflag & VI_DOOMED) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EBADF);
 		}
 
 		/*
 		 * If this is unlocking a write locked region, flush and
 		 * commit them before unlocking. This is required by
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_UNLCK &&
 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 		    ap->a_flags))
 			(void) ncl_flush(vp, MNT_WAIT, td, 1, 0);
 
 		/*
 		 * Loop around doing the lock op, while a blocking lock
 		 * must wait for the lock op to succeed.
 		 */
 		do {
 			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
 			    ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 			    ap->a_op == F_SETLK) {
 				NFSVOPUNLOCK(vp, 0);
 				error = nfs_catnap(PZERO | PCATCH, ret,
 				    "ncladvl");
 				if (error)
 					return (EINTR);
 				NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 				if (vp->v_iflag & VI_DOOMED) {
 					NFSVOPUNLOCK(vp, 0);
 					return (EBADF);
 				}
 			}
 		} while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 		     ap->a_op == F_SETLK);
 		if (ret == NFSERR_DENIED) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EAGAIN);
 		} else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
 			NFSVOPUNLOCK(vp, 0);
 			return (ret);
 		} else if (ret != 0) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EACCES);
 		}
 
 		/*
 		 * Now, if we just got a lock, invalidate data in the buffer
 		 * cache, as required, so that the coherency conforms with
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_SETLK) {
 			if ((np->n_flag & NMODIFIED) == 0) {
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 			}
 			if ((np->n_flag & NMODIFIED) || ret ||
 			    np->n_change != va.va_filerev) {
 				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
 				if ((vp->v_iflag & VI_DOOMED) != 0) {
 					NFSVOPUNLOCK(vp, 0);
 					return (EBADF);
 				}
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 				if (!ret) {
 					np->n_mtime = va.va_mtime;
 					np->n_change = va.va_filerev;
 				}
 			}
 			/* Mark that a file lock has been acquired. */
 			mtx_lock(&np->n_mtx);
 			np->n_flag |= NHASBEENLOCKED;
 			mtx_unlock(&np->n_mtx);
 		}
 		NFSVOPUNLOCK(vp, 0);
 		return (0);
 	} else if (!NFS_ISV4(vp)) {
 		error = NFSVOPLOCK(vp, LK_SHARED);
 		if (error)
 			return (error);
 		if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 			size = VTONFS(vp)->n_size;
 			NFSVOPUNLOCK(vp, 0);
 			error = lf_advlock(ap, &(vp->v_lockf), size);
 		} else {
 			if (nfs_advlock_p != NULL)
 				error = nfs_advlock_p(ap);
 			else {
 				NFSVOPUNLOCK(vp, 0);
 				error = ENOLCK;
 			}
 		}
 		if (error == 0 && ap->a_op == F_SETLK) {
 			error = NFSVOPLOCK(vp, LK_SHARED);
 			if (error == 0) {
 				/* Mark that a file lock has been acquired. */
 				mtx_lock(&np->n_mtx);
 				np->n_flag |= NHASBEENLOCKED;
 				mtx_unlock(&np->n_mtx);
 				NFSVOPUNLOCK(vp, 0);
 			}
 		}
 	}
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	u_quad_t size;
 	int error;
 	
 	if (NFS_ISV4(vp))
 		return (EOPNOTSUPP);
 	error = NFSVOPLOCK(vp, LK_SHARED);
 	if (error)
 		return (error);
 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 		size = VTONFS(vp)->n_size;
 		NFSVOPUNLOCK(vp, 0);
 		error = lf_advlockasync(ap, &(vp->v_lockf), size);
 	} else {
 		NFSVOPUNLOCK(vp, 0);
 		error = EOPNOTSUPP;
 	}
 	return (error);
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 
 	printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid,
 	    (uintmax_t)np->n_vattr.na_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * This is the "real" nfs::bwrite(struct buf*).
  * We set B_CACHE if this is a VMIO buffer.
  */
 int
 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
 {
 	int oldflags, rtval;
 
 	BUF_ASSERT_HELD(bp);
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	oldflags = bp->b_flags;
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 	bundirty(bp);
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_iocmd = BIO_WRITE;
 
 	bufobj_wref(bp->b_bufobj);
 	curthread->td_ru.ru_oublock++;
 
 	/*
 	 * Note: to avoid loopback deadlocks, we do not
 	 * assign b_runningbufspace.
 	 */
 	vfs_busy_pages(bp, 1);
 
 	BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
 
 	if ((oldflags & B_ASYNC) != 0)
 		return (0);
 
 	rtval = bufwait(bp);
 	if (oldflags & B_DELWRI)
 		reassignbuf(bp);
 	brelse(bp);
 	return (rtval);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(struct vop_access_args *ap)
 {
 	struct vattr *vap;
 	struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	accmode_t accmode = ap->a_accmode;
 	struct vattr vattr;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred);
 	if (error)
 		goto out;
 	error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 	    accmode, cred, NULL);
 out:
 	return error;
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(struct vop_read_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	int error;
 
 	/*
 	 * Set access flag.
 	 */
 	mtx_lock(&np->n_mtx);
 	np->n_flag |= NACC;
 	vfs_timestamp(&np->n_atim);
 	mtx_unlock(&np->n_mtx);
 	error = fifo_specops.vop_read(ap);
 	return error;	
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(struct vop_write_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	mtx_lock(&np->n_mtx);
 	np->n_flag |= NUPD;
 	vfs_timestamp(&np->n_mtim);
 	mtx_unlock(&np->n_mtx);
 	return(fifo_specops.vop_write(ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & (NACC | NUPD)) {
 		vfs_timestamp(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vrefcnt(vp) == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			mtx_unlock(&np->n_mtx);
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 			goto out;
 		}
 	}
 	mtx_unlock(&np->n_mtx);
 out:
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Just call ncl_writebp() with the force argument set to 1.
  *
  * NOTE: B_DONE may or may not be set in a_bp on call.
  */
 static int
 nfs_bwrite(struct buf *bp)
 {
 
 	return (ncl_writebp(bp, 1, curthread));
 }
 
 struct buf_ops buf_ops_newnfs = {
 	.bop_name	=	"buf_ops_nfs",
 	.bop_write	=	nfs_bwrite,
 	.bop_strategy	=	bufstrategy,
 	.bop_sync	=	bufsync,
 	.bop_bdflush	=	bufbdflush,
 };
 
 static int
 nfs_getacl(struct vop_getacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 	    NULL);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 static int
 nfs_setacl(struct vop_setacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 	    NULL);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 static int
 nfs_set_text(struct vop_set_text_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np;
 
 	/*
 	 * If the text file has been mmap'd, flush any dirty pages to the
 	 * buffer cache and then...
 	 * Make sure all writes are pushed to the NFS server.  If this is not
 	 * done, the modify time of the file can change while the text
 	 * file is being executed.  This will cause the process that is
 	 * executing the text file to be terminated.
 	 */
 	if (vp->v_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	}
 
 	/* Now, flush the buffer cache. */
 	ncl_flush(vp, MNT_WAIT, curthread, 0, 0);
 
 	/* And, finally, make sure that n_mtime is up to date. */
 	np = VTONFS(vp);
 	mtx_lock(&np->n_mtx);
 	np->n_mtime = np->n_vattr.na_mtime;
 	mtx_unlock(&np->n_mtx);
 
 	vp->v_vflag |= VV_TEXT;
 	return (0);
 }
 
 /*
  * Return POSIX pathconf information applicable to nfs filesystems.
  */
 static int
 nfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct nfsv3_pathconf pc;
 	struct nfsvattr nfsva;
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 	int attrflag, error;
 
 	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 	    ap->a_name == _PC_NO_TRUNC)) ||
 	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
 		/*
 		 * Since only the above 4 a_names are returned by the NFSv3
 		 * Pathconf RPC, there is no point in doing it for others.
 		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
 		 * be used for _PC_NFS4_ACL as well.
 		 */
 		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);
 		if (attrflag != 0)
 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 			    1);
 		if (error != 0)
 			return (error);
 	} else {
 		/*
 		 * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
 		 * just fake them.
 		 */
 		pc.pc_linkmax = LINK_MAX;
 		pc.pc_namemax = NFS_MAXNAMLEN;
 		pc.pc_notrunc = 1;
 		pc.pc_chownrestricted = 1;
 		pc.pc_caseinsensitive = 0;
 		pc.pc_casepreserving = 1;
 		error = 0;
 	}
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = pc.pc_linkmax;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = pc.pc_namemax;
 		break;
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		break;
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = pc.pc_chownrestricted;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = pc.pc_notrunc;
 		break;
 	case _PC_ACL_EXTENDED:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ACL_NFS4:
 		if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_PATH_MAX:
 		if (NFS_ISV4(vp))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 		break;
 	case _PC_MAC_PRESENT:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ASYNC_IO:
 		/* _PC_ASYNC_IO should have been handled by upper layers. */
 		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 		error = EINVAL;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		if (NFS_ISV34(vp))
 			*ap->a_retval = 64;
 		else
 			*ap->a_retval = 32;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = NFS_MAXPATHLEN;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
Index: head/sys/fs/nfsserver/nfs_nfsdport.c
===================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c	(revision 318735)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c	(revision 318736)
@@ -1,3424 +1,3441 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/capsicum.h>
 
 /*
  * Functions that perform the vfs operations required by the routines in
  * nfsd_serv.c. It is hoped that this change will make the server more
  * portable.
  */
 
 #include <fs/nfs/nfsport.h>
 #include <sys/hash.h>
 #include <sys/sysctl.h>
 #include <nlm/nlm_prot.h>
 #include <nlm/nlm.h>
 
 FEATURE(nfsd, "NFSv4 server");
 
 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
 extern int nfsrv_useacl;
 extern int newnfs_numnfsd;
 extern struct mount nfsv4root_mnt;
 extern struct nfsrv_stablefirst nfsrv_stablefirst;
 extern void (*nfsd_call_servertimer)(void);
 extern SVCPOOL	*nfsrvd_pool;
 extern struct nfsv4lock nfsd_suspend_lock;
 extern struct nfsclienthashhead *nfsclienthash;
 extern struct nfslockhashhead *nfslockhash;
 extern struct nfssessionhash *nfssessionhash;
 extern int nfsrv_sessionhashsize;
 extern struct nfsstatsv1 nfsstatsv1;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
 struct mtx nfsrc_udpmtx;
 struct mtx nfs_v4root_mutex;
 struct nfsrvfh nfs_rootfh, nfs_pubfh;
 int nfs_pubfhset = 0, nfs_rootfhset = 0;
 struct proc *nfsd_master_proc = NULL;
 int nfsd_debuglevel = 0;
 static pid_t nfsd_master_pid = (pid_t)-1;
 static char nfsd_master_comm[MAXCOMLEN + 1];
 static struct timeval nfsd_master_start;
 static uint32_t nfsv4_sysid = 0;
 
 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
     struct ucred *);
 
 int nfsrv_enable_crossmntpt = 1;
 static int nfs_commit_blks;
 static int nfs_commit_miss;
 extern int nfsrv_issuedelegs;
 extern int nfsrv_dolocallocks;
 extern int nfsd_enable_stringtouid;
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
     0, "Debug level for NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
     &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
 
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
 #define	NHUSE_INC		16
 #define	NHUSE_MAX		2048
 
 static struct nfsheur {
 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
 	off_t nh_nextoff;	/* next offset for sequential detection */
 	int nh_use;		/* use count for selection */
 	int nh_seqcount;	/* heuristic */
 } nfsheur[NUM_HEURISTIC];
 
 
 /*
  * Heuristic to detect sequential operation.
  */
 static struct nfsheur *
 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
 {
 	struct nfsheur *nh;
 	int hi, try;
 
 	/* Locate best candidate. */
 	try = 32;
 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
 	nh = &nfsheur[hi];
 	while (try--) {
 		if (nfsheur[hi].nh_vp == vp) {
 			nh = &nfsheur[hi];
 			break;
 		}
 		if (nfsheur[hi].nh_use > 0)
 			--nfsheur[hi].nh_use;
 		hi = (hi + 1) % NUM_HEURISTIC;
 		if (nfsheur[hi].nh_use < nh->nh_use)
 			nh = &nfsheur[hi];
 	}
 
 	/* Initialize hint if this is a new file. */
 	if (nh->nh_vp != vp) {
 		nh->nh_vp = vp;
 		nh->nh_nextoff = uio->uio_offset;
 		nh->nh_use = NHUSE_INIT;
 		if (uio->uio_offset == 0)
 			nh->nh_seqcount = 4;
 		else
 			nh->nh_seqcount = 1;
 	}
 
 	/* Calculate heuristic. */
 	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
 	    uio->uio_offset == nh->nh_nextoff) {
 		/* See comments in vfs_vnops.c:sequential_heuristic(). */
 		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
 		if (nh->nh_seqcount > IO_SEQMAX)
 			nh->nh_seqcount = IO_SEQMAX;
 	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
 	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
 		/* Probably a reordered RPC, leave seqcount alone. */
 	} else if (nh->nh_seqcount > 1) {
 		nh->nh_seqcount /= 2;
 	} else {
 		nh->nh_seqcount = 0;
 	}
 	nh->nh_use += NHUSE_INC;
 	if (nh->nh_use > NHUSE_MAX)
 		nh->nh_use = NHUSE_MAX;
 	return (nh);
 }
 
 /*
  * Get attributes into nfsvattr structure.
  */
 int
 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p, int vpislocked)
 {
 	int error, lockedit = 0;
 
 	if (vpislocked == 0) {
 		/*
 		 * When vpislocked == 0, the vnode is either exclusively
 		 * locked by this thread or not locked by this thread.
 		 * As such, shared lock it, if not exclusively locked.
 		 */
 		if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 			lockedit = 1;
 			NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
 		}
 	}
 	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
 	if (lockedit != 0)
 		NFSVOPUNLOCK(vp, 0);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a file handle for a vnode.
  */
 int
 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
 {
 	int error;
 
 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Perform access checking for vnodes obtained from file handles that would
  * refer to files already opened by a Unix client. You cannot just use
  * vn_writechk() and VOP_ACCESSX() for two reasons.
  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
  *     case.
  * 2 - The owner is to be given access irrespective of mode bits for some
  *     operations, so that processes that chmod after opening a file don't
  *     break.
  */
 int
 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
     u_int32_t *supportedtypep)
 {
 	struct vattr vattr;
 	int error = 0, getret = 0;
 
 	if (vpislocked == 0) {
 		if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
 			error = EPERM;
 			goto out;
 		}
 	}
 	if (accmode & VWRITE) {
 		/* Just vn_writechk() changed to check rdonly */
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket or a block or character
 		 * device resident on the file system.
 		 */
 		if (NFSVNO_EXRDONLY(exp) ||
 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				error = EROFS;
 			default:
 				break;
 			}
 		}
 		/*
 		 * If there's shared text associated with
 		 * the inode, try to free it up once.  If
 		 * we fail, we can't allow writing.
 		 */
 		if (VOP_IS_TEXT(vp) && error == 0)
 			error = ETXTBSY;
 	}
 	if (error != 0) {
 		if (vpislocked == 0)
 			NFSVOPUNLOCK(vp, 0);
 		goto out;
 	}
 
 	/*
 	 * Should the override still be applied when ACLs are enabled?
 	 */
 	error = VOP_ACCESSX(vp, accmode, cred, p);
 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
 		/*
 		 * Try again with VEXPLICIT_DENY, to see if the test for
 		 * deletion is supported.
 		 */
 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
 		if (error == 0) {
 			if (vp->v_type == VDIR) {
 				accmode &= ~(VDELETE | VDELETE_CHILD);
 				accmode |= VWRITE;
 				error = VOP_ACCESSX(vp, accmode, cred, p);
 			} else if (supportedtypep != NULL) {
 				*supportedtypep &= ~NFSACCESS_DELETE;
 			}
 		}
 	}
 
 	/*
 	 * Allow certain operations for the owner (reads and writes
 	 * on files that are already open).
 	 */
 	if (override != NFSACCCHK_NOOVERRIDE &&
 	    (error == EPERM || error == EACCES)) {
 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
 			error = 0;
 		else if (override & NFSACCCHK_ALLOWOWNER) {
 			getret = VOP_GETATTR(vp, &vattr, cred);
 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
 				error = 0;
 		}
 	}
 	if (vpislocked == 0)
 		NFSVOPUNLOCK(vp, 0);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set attribute(s) vnop.
  */
 int
 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	int error;
 
 	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set up nameidata for a lookup() call and do it.
  */
 int
 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
     struct vnode **retdirp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 	int i;
 	struct iovec aiov;
 	struct uio auio;
 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
 	int error = 0, crossmnt;
 	char *cp;
 
 	*retdirp = NULL;
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 	ndp->ni_lcf = 0;
 	/*
 	 * Extract and set starting directory.
 	 */
 	if (dp->v_type != VDIR) {
 		if (islocked)
 			vput(dp);
 		else
 			vrele(dp);
 		nfsvno_relpathbuf(ndp);
 		error = ENOTDIR;
 		goto out1;
 	}
 	if (islocked)
 		NFSVOPUNLOCK(dp, 0);
 	VREF(dp);
 	*retdirp = dp;
 	if (NFSVNO_EXRDONLY(exp))
 		cnp->cn_flags |= RDONLY;
 	ndp->ni_segflg = UIO_SYSSPACE;
 	crossmnt = 1;
 
 	if (nd->nd_flag & ND_PUBLOOKUP) {
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(dp);
 			/*
 			 * Check for degenerate pathnames here, since lookup()
 			 * panics on them.
 			 */
 			for (i = 1; i < ndp->ni_pathlen; i++)
 				if (cnp->cn_pnbuf[i] != '/')
 					break;
 			if (i == ndp->ni_pathlen) {
 				error = NFSERR_ACCES;
 				goto out;
 			}
 			dp = rootvnode;
 			VREF(dp);
 		}
 	} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
 	    (nd->nd_flag & ND_NFSV4) == 0) {
 		/*
 		 * Only cross mount points for NFSv4 when doing a
 		 * mount while traversing the file system above
 		 * the mount point, unless nfsrv_enable_crossmntpt is set.
 		 */
 		cnp->cn_flags |= NOCROSSMOUNT;
 		crossmnt = 0;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * because lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_thread = p;
 	ndp->ni_startdir = dp;
 	ndp->ni_rootdir = rootvnode;
 	ndp->ni_topdir = NULL;
 
 	if (!lockleaf)
 		cnp->cn_flags |= LOCKLEAF;
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial
 		 * termination occurs if no symlink encountered.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
 				nfsvno_relpathbuf(ndp);
 			if (ndp->ni_vp && !lockleaf)
 				NFSVOPUNLOCK(ndp->ni_vp, 0);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			NFSVOPUNLOCK(ndp->ni_dvp, 0);
 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = uma_zalloc(namei_zone, M_WAITOK);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_td = NULL;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				uma_zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			uma_zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory
 		 * should replace current directory.  Normally ni_dvp
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 	if (!lockleaf)
 		cnp->cn_flags &= ~LOCKLEAF;
 
 out:
 	if (error) {
 		nfsvno_relpathbuf(ndp);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 
 out1:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Set up a pathname buffer and return a pointer to it and, optionally
  * set a hash pointer.
  */
 void
 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	cnp->cn_flags |= (NOMACCHECK | HASBUF);
 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
 	if (hashpp != NULL)
 		*hashpp = NULL;
 	*bufpp = cnp->cn_pnbuf;
 }
 
 /*
  * Release the above path buffer, if not released by nfsvno_namei().
  */
 void
 nfsvno_relpathbuf(struct nameidata *ndp)
 {
 
 	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
 		panic("nfsrelpath");
 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
 	ndp->ni_cnd.cn_flags &= ~HASBUF;
 }
 
 /*
  * Readlink vnode op into an mbuf list.
  */
 int
 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
 {
 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
 	struct iovec *ivp = iv;
 	struct uio io, *uiop = &io;
 	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
 	int i, len, tlen, error = 0;
 
 	len = 0;
 	i = 0;
 	while (len < NFS_MAXPATHLEN) {
 		NFSMGET(mp);
 		MCLGET(mp, M_WAITOK);
 		mp->m_len = M_SIZE(mp);
 		if (len == 0) {
 			mp3 = mp2 = mp;
 		} else {
 			mp2->m_next = mp;
 			mp2 = mp;
 		}
 		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
 			mp->m_len = NFS_MAXPATHLEN - len;
 			len = NFS_MAXPATHLEN;
 		} else {
 			len += mp->m_len;
 		}
 		ivp->iov_base = mtod(mp, caddr_t);
 		ivp->iov_len = mp->m_len;
 		i++;
 		ivp++;
 	}
 	uiop->uio_iov = iv;
 	uiop->uio_iovcnt = i;
 	uiop->uio_offset = 0;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	error = VOP_READLINK(vp, uiop, cred);
 	if (error) {
 		m_freem(mp3);
 		*lenp = 0;
 		goto out;
 	}
 	if (uiop->uio_resid > 0) {
 		len -= uiop->uio_resid;
 		tlen = NFSM_RNDUP(len);
 		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
 	}
 	*lenp = len;
 	*mpp = mp3;
 	*mpendp = mp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Read vnode op call into mbuf list.
  */
 int
 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
 {
 	struct mbuf *m;
 	int i;
 	struct iovec *iv;
 	struct iovec *iv2;
 	int error = 0, len, left, siz, tlen, ioflag = 0;
 	struct mbuf *m2 = NULL, *m3;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	len = left = NFSM_RNDUP(cnt);
 	m3 = NULL;
 	/*
 	 * Generate the mbuf list with the uio_iov ref. to it.
 	 */
 	i = 0;
 	while (left > 0) {
 		NFSMGET(m);
 		MCLGET(m, M_WAITOK);
 		m->m_len = 0;
 		siz = min(M_TRAILINGSPACE(m), left);
 		left -= siz;
 		i++;
 		if (m3)
 			m2->m_next = m;
 		else
 			m3 = m;
 		m2 = m;
 	}
 	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
 	    M_TEMP, M_WAITOK);
 	uiop->uio_iov = iv2 = iv;
 	m = m3;
 	left = len;
 	i = 0;
 	while (left > 0) {
 		if (m == NULL)
 			panic("nfsvno_read iov");
 		siz = min(M_TRAILINGSPACE(m), left);
 		if (siz > 0) {
 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
 			iv->iov_len = siz;
 			m->m_len += siz;
 			left -= siz;
 			iv++;
 			i++;
 		}
 		m = m->m_next;
 	}
 	uiop->uio_iovcnt = i;
 	uiop->uio_offset = off;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
 	FREE((caddr_t)iv2, M_TEMP);
 	if (error) {
 		m_freem(m3);
 		*mpp = NULL;
 		goto out;
 	}
 	nh->nh_nextoff = uiop->uio_offset;
 	tlen = len - uiop->uio_resid;
 	cnt = cnt < tlen ? cnt : tlen;
 	tlen = NFSM_RNDUP(cnt);
 	if (tlen == 0) {
 		m_freem(m3);
 		m3 = NULL;
 	} else if (len != tlen || tlen != cnt)
 		nfsrv_adj(m3, len - tlen, tlen - cnt);
 	*mpp = m3;
 	*mpendp = m2;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Write vnode op from an mbuf list.
  */
 int
 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
 {
 	struct iovec *ivp;
 	int i, len;
 	struct iovec *iv;
 	int ioflags, error;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
 	    M_WAITOK);
 	uiop->uio_iov = iv = ivp;
 	uiop->uio_iovcnt = cnt;
 	i = mtod(mp, caddr_t) + mp->m_len - cp;
 	len = retlen;
 	while (len > 0) {
 		if (mp == NULL)
 			panic("nfsvno_write");
 		if (i > 0) {
 			i = min(i, len);
 			ivp->iov_base = cp;
 			ivp->iov_len = i;
 			ivp++;
 			len -= i;
 		}
 		mp = mp->m_next;
 		if (mp) {
 			i = mp->m_len;
 			cp = mtod(mp, caddr_t);
 		}
 	}
 
 	if (stable == NFSWRITE_UNSTABLE)
 		ioflags = IO_NODELOCKED;
 	else
 		ioflags = (IO_SYNC | IO_NODELOCKED);
 	uiop->uio_resid = retlen;
 	uiop->uio_rw = UIO_WRITE;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	NFSUIOPROC(uiop, p);
 	uiop->uio_offset = off;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
 	error = VOP_WRITE(vp, uiop, ioflags, cred);
 	if (error == 0)
 		nh->nh_nextoff = uiop->uio_offset;
 	FREE((caddr_t)iv, M_TEMP);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Common code for creating a regular file (plus special files for V2).
  */
 int
 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
     int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
 {
 	u_quad_t tempsize;
 	int error;
 
 	error = nd->nd_repstat;
 	if (!error && ndp->ni_vp == NULL) {
 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
 			vrele(ndp->ni_startdir);
 			error = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			if (!error) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					error = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, nd->nd_cred);
 					if (error != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						error = NFSERR_NOTSUPP;
 					}
 				}
 			}
 		/*
 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
 		 * (This implies, just get out on an error.)
 		 */
 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
 			nvap->na_type == VFIFO) {
 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
 				nvap->na_type = VFIFO;
                         if (nvap->na_type != VFIFO &&
 			    (error = priv_check_cred(nd->nd_cred,
 			     PRIV_VFS_MKNOD_DEV, 0))) {
 				vrele(ndp->ni_startdir);
 				nfsvno_relpathbuf(ndp);
 				vput(ndp->ni_dvp);
 				goto out;
 			}
 			nvap->na_rdev = rdev;
 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 			    &ndp->ni_cnd, &nvap->na_vattr);
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			vrele(ndp->ni_startdir);
 			if (error)
 				goto out;
 		} else {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			error = ENXIO;
 			goto out;
 		}
 		*vpp = ndp->ni_vp;
 	} else {
 		/*
 		 * Handle cases where error is already set and/or
 		 * the file exists.
 		 * 1 - clean up the lookup
 		 * 2 - iff !error and na_size set, truncate it
 		 */
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		*vpp = ndp->ni_vp;
 		if (ndp->ni_dvp == *vpp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (!error && nvap->na_size != VNOVAL) {
 			error = nfsvno_accchk(*vpp, VWRITE,
 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 			    NFSACCCHK_VPISLOCKED, NULL);
 			if (!error) {
 				tempsize = nvap->na_size;
 				NFSVNO_ATTRINIT(nvap);
 				nvap->na_size = tempsize;
 				error = VOP_SETATTR(*vpp,
 				    &nvap->na_vattr, nd->nd_cred);
 			}
 		}
 		if (error)
 			vput(*vpp);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do a mknod vnode op.
  */
 int
 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p)
 {
 	int error = 0;
 	enum vtype vtyp;
 
 	vtyp = nvap->na_type;
 	/*
 	 * Iff doesn't exist, create it.
 	 */
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		error = NFSERR_BADTYPE;
 		goto out;
 	}
 	if (vtyp == VSOCK) {
 		vrele(ndp->ni_startdir);
 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		vput(ndp->ni_dvp);
 		nfsvno_relpathbuf(ndp);
 	} else {
 		if (nvap->na_type != VFIFO &&
 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			goto out;
 		}
 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		vput(ndp->ni_dvp);
 		nfsvno_relpathbuf(ndp);
 		vrele(ndp->ni_startdir);
 		/*
 		 * Since VOP_MKNOD returns the ni_vp, I can't
 		 * see any reason to do the lookup.
 		 */
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Mkdir vnode op.
  */
 int
 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp != NULL) {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		nfsvno_relpathbuf(ndp);
 		error = EEXIST;
 		goto out;
 	}
 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr);
 	vput(ndp->ni_dvp);
 	nfsvno_relpathbuf(ndp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * symlink vnode op.
  */
 int
 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
     struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 
 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr, pathcp);
 	vput(ndp->ni_dvp);
 	vrele(ndp->ni_startdir);
 	nfsvno_relpathbuf(ndp);
 	/*
 	 * Although FreeBSD still had the lookup code in
 	 * it for 7/current, there doesn't seem to be any
 	 * point, since VOP_SYMLINK() returns the ni_vp.
 	 * Just vput it for v2.
 	 */
 	if (!not_v2 && !error)
 		vput(ndp->ni_vp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Parse symbolic link arguments.
  * This function has an ugly side effect. It will MALLOC() an area for
  * the symlink and set iov_base to point to it, only if it succeeds.
  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
  * be FREE'd later.
  */
 int
 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
     struct thread *p, char **pathcpp, int *lenp)
 {
 	u_int32_t *tl;
 	char *pathcp = NULL;
 	int error = 0, len;
 	struct nfsv2_sattr *sp;
 
 	*pathcpp = NULL;
 	*lenp = 0;
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len > NFS_MAXPATHLEN || len <= 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
 	error = nfsrv_mtostr(nd, pathcp, len);
 	if (error)
 		goto nfsmout;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
 	}
 	*pathcpp = pathcp;
 	*lenp = len;
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (pathcp)
 		free(pathcp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Remove a non-directory object.
  */
 int
 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp;
 	int error = 0;
 
 	vp = ndp->ni_vp;
 	if (vp->v_type == VDIR)
 		error = NFSERR_ISDIR;
 	else if (is_v4)
 		error = nfsrv_checkremove(vp, 1, p);
 	if (!error)
 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
 	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 		nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Remove a directory.
  */
 int
 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp;
 	int error = 0;
 
 	vp = ndp->ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (ndp->ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT)
 		error = EBUSY;
 out:
 	if (!error)
 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
 	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 		nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Rename vnode op.
  */
 int
 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
 {
 	struct vnode *fvp, *tvp, *tdvp;
 	int error = 0;
 
 	fvp = fromndp->ni_vp;
 	if (ndstat) {
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		error = ndstat;
 		goto out1;
 	}
 	tdvp = tondp->ni_dvp;
 	tvp = tondp->ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
 			goto out;
 		}
 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 			goto out;
 		}
 
 		/*
 		 * A rename to '.' or '..' results in a prematurely
 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
 		 * here.
 		 */
 		if ((tondp->ni_cnd.cn_namelen == 1 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
 		    (tondp->ni_cnd.cn_namelen == 2 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp->v_mount != tdvp->v_mount) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp == tdvp) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 		goto out;
 	}
 	if (fvp == tvp) {
 		/*
 		 * If source and destination are the same, there is nothing to
 		 * do. Set error to -1 to indicate this.
 		 */
 		error = -1;
 		goto out;
 	}
 	if (ndflag & ND_NFSV4) {
 		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
 			error = nfsrv_checkremove(fvp, 0, p);
 			NFSVOPUNLOCK(fvp, 0);
 		} else
 			error = EPERM;
 		if (tvp && !error)
 			error = nfsrv_checkremove(tvp, 1, p);
 	} else {
 		/*
 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
 		 * that the NFSv4 client won't be confused by the rename.
 		 * Since nfsd_recalldelegation() can only be called on an
 		 * unlocked vnode at this point and fvp is the file that will
 		 * still exist after the rename, just do fvp.
 		 */
 		nfsd_recalldelegation(fvp, p);
 	}
 out:
 	if (!error) {
 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 		    &tondp->ni_cnd);
 	} else {
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		if (error == -1)
 			error = 0;
 	}
 	vrele(tondp->ni_startdir);
 	nfsvno_relpathbuf(tondp);
 out1:
 	vrele(fromndp->ni_startdir);
 	nfsvno_relpathbuf(fromndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Link vnode op.
  */
 int
 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *xp;
 	int error = 0;
 
 	xp = ndp->ni_vp;
 	if (xp != NULL) {
 		error = EEXIST;
 	} else {
 		xp = ndp->ni_dvp;
 		if (vp->v_mount != xp->v_mount)
 			error = EXDEV;
 	}
 	if (!error) {
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if ((vp->v_iflag & VI_DOOMED) == 0)
 			error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
 		else
 			error = EPERM;
 		if (ndp->ni_dvp == vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		NFSVOPUNLOCK(vp, 0);
 	} else {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (ndp->ni_vp)
 			vrele(ndp->ni_vp);
 	}
 	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the fsync() appropriate for the commit.
  */
 int
 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
     struct thread *td)
 {
 	int error = 0;
 
 	/*
 	 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
 	 * file is done.  At this time VOP_FSYNC does not accept offset and
 	 * byte count parameters so call VOP_FSYNC the whole file for now.
 	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
 	 * File systems that do not use the buffer cache (as indicated
 	 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
 	 */
 	if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
 	    (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
 		/*
 		 * Give up and do the whole thing
 		 */
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 		error = VOP_FSYNC(vp, MNT_WAIT, td);
 	} else {
 		/*
 		 * Locate and synchronously write any buffers that fall
 		 * into the requested range.  Note:  we are assuming that
 		 * f_iosize is a power of 2.
 		 */
 		int iosize = vp->v_mount->mnt_stat.f_iosize;
 		int iomask = iosize - 1;
 		struct bufobj *bo;
 		daddr_t lblkno;
 
 		/*
 		 * Align to iosize boundary, super-align to page boundary.
 		 */
 		if (off & iomask) {
 			cnt += off & iomask;
 			off &= ~(u_quad_t)iomask;
 		}
 		if (off & PAGE_MASK) {
 			cnt += off & PAGE_MASK;
 			off &= ~(u_quad_t)PAGE_MASK;
 		}
 		lblkno = off / iosize;
 
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, off, off + cnt,
 			    OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_LOCK(bo);
 		while (cnt > 0) {
 			struct buf *bp;
 
 			/*
 			 * If we have a buffer and it is marked B_DELWRI we
 			 * have to lock and write it.  Otherwise the prior
 			 * write is assumed to have already been committed.
 			 *
 			 * gbincore() can return invalid buffers now so we
 			 * have to check that bit as well (though B_DELWRI
 			 * should not be set if B_INVAL is set there could be
 			 * a race here since we haven't locked the buffer).
 			 */
 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 				    LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 					BO_LOCK(bo);
 					continue; /* retry */
 				}
 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 				    B_DELWRI) {
 					bremfree(bp);
 					bp->b_flags &= ~B_ASYNC;
 					bwrite(bp);
 					++nfs_commit_miss;
 				} else
 					BUF_UNLOCK(bp);
 				BO_LOCK(bo);
 			}
 			++nfs_commit_blks;
 			if (cnt < iosize)
 				break;
 			cnt -= iosize;
 			++lblkno;
 		}
 		BO_UNLOCK(bo);
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Statfs vnode op.
  */
 int
 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
 {
 	int error;
 
 	error = VFS_STATFS(vp->v_mount, sf);
 	if (error == 0) {
 		/*
 		 * Since NFS handles these values as unsigned on the
 		 * wire, there is no way to represent negative values,
 		 * so set them to 0. Without this, they will appear
 		 * to be very large positive values for clients like
 		 * Solaris10.
 		 */
 		if (sf->f_bavail < 0)
 			sf->f_bavail = 0;
 		if (sf->f_ffree < 0)
 			sf->f_ffree = 0;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
  * must handle nfsrv_opencheck() calls after any other access checks.
  */
 void
 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
     struct nfsexstuff *exp, struct vnode **vpp)
 {
 	struct vnode *vp = NULL;
 	u_quad_t tempsize;
 	struct nfsexstuff nes;
 
 	if (ndp->ni_vp == NULL)
 		nd->nd_repstat = nfsrv_opencheck(clientid,
 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
 	if (!nd->nd_repstat) {
 		if (ndp->ni_vp == NULL) {
 			vrele(ndp->ni_startdir);
 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			if (!nd->nd_repstat) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, cred);
 					if (nd->nd_repstat != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						nd->nd_repstat = NFSERR_NOTSUPP;
 					} else
 						NFSSETBIT_ATTRBIT(attrbitp,
 						    NFSATTRBIT_TIMEACCESS);
 				} else {
 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 					    aclp, p, attrbitp, exp);
 				}
 			}
 			vp = ndp->ni_vp;
 		} else {
 			if (ndp->ni_startdir)
 				vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vp = ndp->ni_vp;
 			if (create == NFSV4OPEN_CREATE) {
 				if (ndp->ni_dvp == vp)
 					vrele(ndp->ni_dvp);
 				else
 					vput(ndp->ni_dvp);
 			}
 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
 				if (ndp->ni_cnd.cn_flags & RDONLY)
 					NFSVNO_SETEXRDONLY(&nes);
 				else
 					NFSVNO_EXINIT(&nes);
 				nd->nd_repstat = nfsvno_accchk(vp, 
 				    VWRITE, cred, &nes, p,
 				    NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 				if (!nd->nd_repstat) {
 					tempsize = nvap->na_size;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_size = tempsize;
 					nd->nd_repstat = VOP_SETATTR(vp,
 					    &nvap->na_vattr, cred);
 				}
 			} else if (vp->v_type == VREG) {
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 			}
 		}
 	} else {
 		if (ndp->ni_cnd.cn_flags & HASBUF)
 			nfsvno_relpathbuf(ndp);
 		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
 			vrele(ndp->ni_startdir);
 			if (ndp->ni_dvp == ndp->ni_vp)
 				vrele(ndp->ni_dvp);
 			else
 				vput(ndp->ni_dvp);
 			if (ndp->ni_vp)
 				vput(ndp->ni_vp);
 		}
 	}
 	*vpp = vp;
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * Updates the file rev and sets the mtime and ctime
  * to the current clock time, returning the va_filerev and va_Xtime
  * values.
  * Return ESTALE to indicate the vnode is VI_DOOMED.
  */
 int
 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
     struct ucred *cred, struct thread *p)
 {
 	struct vattr va;
 
 	VATTR_NULL(&va);
 	vfs_timestamp(&va.va_mtime);
 	if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 		if ((vp->v_iflag & VI_DOOMED) != 0)
 			return (ESTALE);
 	}
 	(void) VOP_SETATTR(vp, &va, cred);
 	(void) nfsvno_getattr(vp, nvap, cred, p, 1);
 	return (0);
 }
 
 /*
  * Glue routine to nfsv4_fillattr().
  */
 int
 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
     struct ucred *cred, struct thread *p, int isdgram, int reterr,
     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
 {
 	int error;
 
 	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
 	    mounted_on_fileno);
 	NFSEXITCODE2(0, nd);
 	return (error);
 }
 
 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 /*
  * nfs readdir service
  * - mallocs what it thinks is enough to read
  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
  * - calls VOP_READDIR()
  * - loops around building the reply
  *	if the output generated exceeds count break out of loop
  *	The NFSM_CLGET macro is used here so that the reply will be packed
  *	tightly in mbuf clusters.
  * - it trims out records with d_fileno == 0
  *	this doesn't matter for Unix clients, but they might confuse clients
  *	for other os'.
  * - it trims out records with d_type == DT_WHT
  *	these cannot be seen through NFS (unless we extend the protocol)
  *     The alternate call nfsrvd_readdirplus() does lookups as well.
  * PS: The NFS protocol spec. does not clarify what the "count" byte
  *	argument is a count of.. just name strings and file id's or the
  *	entire reply rpc or ...
  *	I tried just file name and id sizes and it confused the Sun client,
  *	so I am using the full rpc size now. The "paranoia.." comment refers
  *	to including the status longwords that are not a part of the dir.
  *	"entry" structures, but are in the rpc.
  */
 int
 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct nfsvattr at;
 	int nlen, error = 0, getret = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies;
 	u_int64_t off, toff, verf;
 	u_long *cookies = NULL, *cookiep;
 	struct uio io;
 	struct iovec iv;
 	int is_ufs;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		off = fxdr_unsigned(u_quad_t, *tl++);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 2;
 		verf = fxdr_hyper(tl);
 		tl += 2;
 	}
 	toff = off;
 	cnt = fxdr_unsigned(int, *tl);
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 	fullsiz = siz;
 	if (nd->nd_flag & ND_NFSV3) {
 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
 		    p, 1);
 #if 0
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 #endif
 	}
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (nd->nd_repstat == 0 && cnt == 0) {
 		if (nd->nd_flag & ND_NFSV2)
 			/* NFSv2 does not have NFSERR_TOOSMALL */
 			nd->nd_repstat = EPERM;
 		else
 			nd->nd_repstat = NFSERR_TOOSMALL;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free((caddr_t)cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (nd->nd_flag & ND_NFSV3) {
 		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = getret;
 	}
 
 	/*
 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
 	 */
 	if (nd->nd_repstat) {
 		vput(vp);
 		free((caddr_t)rbuf, M_TEMP);
 		if (cookies)
 			free((caddr_t)cookies, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		} else {
 			nfsrv_postopattr(nd, getret, &at);
 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			txdr_hyper(at.na_filerev, tl);
 			tl += 2;
 		}
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		FREE((caddr_t)rbuf, M_TEMP);
 		FREE((caddr_t)cookies, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	     (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 	vput(vp);
 
 	/*
 	 * dirlen is the size of the reply, including all XDR and must
 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
 	 * if the XDR should be included in "count", but to be safe, we do.
 	 * (Include the two booleans at the end of the reply in dirlen now.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 	} else {
 		dirlen = 2 * NFSX_UNSIGNED;
 	}
 
 	/* Loop through the records and build reply */
 	while (cpos < cend && ncookies > 0) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 			nlen <= NFS_MAXNAMLEN) {
 			if (nd->nd_flag & ND_NFSV3)
 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			else
 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			if (dirlen > cnt) {
 				eofflag = 0;
 				break;
 			}
 
 			/*
 			 * Build the directory record xdr from
 			 * the dirent entry.
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 			}
 			*tl = txdr_unsigned(dp->d_fileno);
 			(void) nfsm_strtom(nd, dp->d_name, nlen);
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = 0;
 			} else
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(*cookiep);
 		}
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos < cend)
 		eofflag = 0;
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = newnfs_false;
 	if (eofflag)
 		*tl = newnfs_true;
 	else
 		*tl = newnfs_false;
 	FREE((caddr_t)rbuf, M_TEMP);
 	FREE((caddr_t)cookies, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Readdirplus for V3 and Readdir for V4.
  */
 int
 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct vnode *nvp;
 	fhandle_t nfh;
 	struct nfsvattr nva, at, *nvap = &nva;
 	struct mbuf *mb0, *mb1;
 	struct nfsreferral *refp;
 	int nlen, r, error = 0, getret = 1, usevget = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 	caddr_t bpos0, bpos1;
 	u_int64_t off, toff, verf;
 	u_long *cookies = NULL, *cookiep;
 	nfsattrbit_t attrbits, rderrbits, savbits;
 	struct uio io;
 	struct iovec iv;
 	struct componentname cn;
 	int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
 	struct mount *mp, *new_mp;
 	uint64_t mounted_on_fileno;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	off = fxdr_hyper(tl);
 	toff = off;
 	tl += 2;
 	verf = fxdr_hyper(tl);
 	tl += 2;
 	siz = fxdr_unsigned(int, *tl++);
 	cnt = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Use the server's maximum data transfer size as the upper bound
 	 * on reply datalen.
 	 */
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 
 	/*
 	 * siz is a "hint" of how much directory information (name, fileid,
 	 * cookie) should be in the reply. At least one client "hints" 0,
 	 * so I set it to cnt for that case. I also round it up to the
 	 * next multiple of DIRBLKSIZ.
 	 */
 	if (siz <= 0)
 		siz = cnt;
 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error)
 			goto nfsmout;
 		NFSSET_ATTRBIT(&savbits, &attrbits);
 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
 		NFSZERO_ATTRBIT(&rderrbits);
 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
 	} else {
 		NFSZERO_ATTRBIT(&attrbits);
 	}
 	fullsiz = siz;
 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat) {
 	    if (off && verf != at.na_filerev) {
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 #if 0
 		if (nd->nd_flag & ND_NFSV4) {
 			nd->nd_repstat = NFSERR_NOTSAME;
 		} else {
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 		}
 #endif
 	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
 		nd->nd_repstat = NFSERR_BAD_COOKIE;
 	    }
 	}
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (!nd->nd_repstat && cnt == 0)
 		nd->nd_repstat = NFSERR_TOOSMALL;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
 
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free((caddr_t)cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = getret;
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (cookies)
 			free((caddr_t)cookies, M_TEMP);
 		free((caddr_t)rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		tl += 2;
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		free((caddr_t)cookies, M_TEMP);
 		free((caddr_t)rbuf, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	   (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
 	   ((nd->nd_flag & ND_NFSV4) &&
 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 
 	/*
 	 * Busy the file system so that the mount point won't go away
 	 * and, as such, VFS_VGET() can be used safely.
 	 */
 	mp = vp->v_mount;
 	vfs_ref(mp);
 	NFSVOPUNLOCK(vp, 0);
 	nd->nd_repstat = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (nd->nd_repstat != 0) {
 		vrele(vp);
 		free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 
 	/*
-	 * For now ZFS requires VOP_LOOKUP as a workaround.  Until ino_t is changed
-	 * to 64 bit type a ZFS filesystem with over 1 billion files in it
-	 * will suffer from 64bit -> 32bit truncation.
+	 * Check to see if entries in this directory can be safely acquired
+	 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
+	 * ZFS snapshot directories need VOP_LOOKUP(), so that any
+	 * automount of the snapshot directory that is required will
+	 * be done.
+	 * This needs to be done here for NFSv4, since NFSv4 never does
+	 * a VFS_VGET() for "." or "..".
 	 */
-	if (is_zfs == 1)
-		usevget = 0;
+	if (is_zfs == 1) {
+		r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
+		if (r == EOPNOTSUPP) {
+			usevget = 0;
+			cn.cn_nameiop = LOOKUP;
+			cn.cn_lkflags = LK_SHARED | LK_RETRY;
+			cn.cn_cred = nd->nd_cred;
+			cn.cn_thread = p;
+		} else if (r == 0)
+			vput(nvp);
+	}
 
-	cn.cn_nameiop = LOOKUP;
-	cn.cn_lkflags = LK_SHARED | LK_RETRY;
-	cn.cn_cred = nd->nd_cred;
-	cn.cn_thread = p;
-
 	/*
 	 * Save this position, in case there is an error before one entry
 	 * is created.
 	 */
 	mb0 = nd->nd_mb;
 	bpos0 = nd->nd_bpos;
 
 	/*
 	 * Fill in the first part of the reply.
 	 * dirlen is the reply length in bytes and cannot exceed cnt.
 	 * (Include the two booleans at the end of the reply in dirlen now,
 	 *  so we recognize when we have exceeded cnt.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 		nfsrv_postopattr(nd, getret, &at);
 	} else {
 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 	txdr_hyper(at.na_filerev, tl);
 
 	/*
 	 * Save this position, in case there is an empty reply needed.
 	 */
 	mb1 = nd->nd_mb;
 	bpos1 = nd->nd_bpos;
 
 	/* Loop through the records and build reply */
 	entrycnt = 0;
 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 		    nlen <= NFS_MAXNAMLEN &&
 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
 			/*
 			 * Save the current position in the reply, in case
 			 * this entry exceeds cnt.
 			 */
 			mb1 = nd->nd_mb;
 			bpos1 = nd->nd_bpos;
 	
 			/*
 			 * For readdir_and_lookup get the vnode using
 			 * the file number.
 			 */
 			nvp = NULL;
 			refp = NULL;
 			r = 0;
 			at_root = 0;
 			needs_unbusy = 0;
 			new_mp = mp;
 			mounted_on_fileno = (uint64_t)dp->d_fileno;
 			if ((nd->nd_flag & ND_NFSV3) ||
 			    NFSNONZERO_ATTRBIT(&savbits)) {
 				if (nd->nd_flag & ND_NFSV4)
 					refp = nfsv4root_getreferral(NULL,
 					    vp, dp->d_fileno);
 				if (refp == NULL) {
 					if (usevget)
 						r = VFS_VGET(mp, dp->d_fileno,
 						    LK_SHARED, &nvp);
 					else
 						r = EOPNOTSUPP;
 					if (r == EOPNOTSUPP) {
-						usevget = 0;
+						if (usevget) {
+							usevget = 0;
+							cn.cn_nameiop = LOOKUP;
+							cn.cn_lkflags =
+							    LK_SHARED |
+							    LK_RETRY;
+							cn.cn_cred =
+							    nd->nd_cred;
+							cn.cn_thread = p;
+						}
 						cn.cn_nameptr = dp->d_name;
 						cn.cn_namelen = nlen;
 						cn.cn_flags = ISLASTCN |
 						    NOFOLLOW | LOCKLEAF;
 						if (nlen == 2 &&
 						    dp->d_name[0] == '.' &&
 						    dp->d_name[1] == '.')
 							cn.cn_flags |=
 							    ISDOTDOT;
 						if (NFSVOPLOCK(vp, LK_SHARED)
 						    != 0) {
 							nd->nd_repstat = EPERM;
 							break;
 						}
 						if ((vp->v_vflag & VV_ROOT) != 0
 						    && (cn.cn_flags & ISDOTDOT)
 						    != 0) {
 							vref(vp);
 							nvp = vp;
 							r = 0;
 						} else {
 							r = VOP_LOOKUP(vp, &nvp,
 							    &cn);
 							if (vp != nvp)
 								NFSVOPUNLOCK(vp,
 								    0);
 						}
 					}
 
 					/*
 					 * For NFSv4, check to see if nvp is
 					 * a mount point and get the mount
 					 * point vnode, as required.
 					 */
 					if (r == 0 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    nvp->v_mountedhere != NULL) {
 						new_mp = nvp->v_mountedhere;
 						r = vfs_busy(new_mp, 0);
 						vput(nvp);
 						nvp = NULL;
 						if (r == 0) {
 							r = VFS_ROOT(new_mp,
 							    LK_SHARED, &nvp);
 							needs_unbusy = 1;
 							if (r == 0)
 								at_root = 1;
 						}
 					}
 				}
 				if (!r) {
 				    if (refp == NULL &&
 					((nd->nd_flag & ND_NFSV3) ||
 					 NFSNONZERO_ATTRBIT(&attrbits))) {
 					r = nfsvno_getfh(nvp, &nfh, p);
 					if (!r)
 					    r = nfsvno_getattr(nvp, nvap,
 						nd->nd_cred, p, 1);
 					if (r == 0 && is_zfs == 1 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    vp->v_mount != nvp->v_mount) {
 					    /*
 					     * For a ZFS snapshot, there is a
 					     * pseudo mount that does not set
 					     * v_mountedhere, so it needs to
 					     * be detected via a different
 					     * mount structure.
 					     */
 					    at_root = 1;
 					    if (new_mp == mp)
 						new_mp = nvp->v_mount;
 					}
 				    }
 				} else {
 				    nvp = NULL;
 				}
 				if (r) {
 					if (!NFSISSET_ATTRBIT(&attrbits,
 					    NFSATTRBIT_RDATTRERROR)) {
 						if (nvp != NULL)
 							vput(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						nd->nd_repstat = r;
 						break;
 					}
 				}
 			}
 
 			/*
 			 * Build the directory record xdr
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 				*tl = txdr_unsigned(dp->d_fileno);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = 0;
 				*tl = txdr_unsigned(*cookiep);
 				nfsrv_postopattr(nd, 0, nvap);
 				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
 				if (nvp != NULL)
 					vput(nvp);
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 				*tl = txdr_unsigned(*cookiep);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				if (nvp != NULL) {
 					supports_nfsv4acls =
 					    nfs_supportsnfsv4acls(nvp);
 					NFSVOPUNLOCK(nvp, 0);
 				} else
 					supports_nfsv4acls = 0;
 				if (refp != NULL) {
 					dirlen += nfsrv_putreferralattr(nd,
 					    &savbits, refp, 0,
 					    &nd->nd_repstat);
 					if (nd->nd_repstat) {
 						if (nvp != NULL)
 							vrele(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						break;
 					}
 				} else if (r) {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &rderrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				} else {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &attrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				}
 				if (nvp != NULL)
 					vrele(nvp);
 				dirlen += (3 * NFSX_UNSIGNED);
 			}
 			if (needs_unbusy != 0)
 				vfs_unbusy(new_mp);
 			if (dirlen <= cnt)
 				entrycnt++;
 		}
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	vrele(vp);
 	vfs_unbusy(mp);
 
 	/*
 	 * If dirlen > cnt, we must strip off the last entry. If that
 	 * results in an empty reply, report NFSERR_TOOSMALL.
 	 */
 	if (dirlen > cnt || nd->nd_repstat) {
 		if (!nd->nd_repstat && entrycnt == 0)
 			nd->nd_repstat = NFSERR_TOOSMALL;
 		if (nd->nd_repstat) {
 			newnfs_trimtrailing(nd, mb0, bpos0);
 			if (nd->nd_flag & ND_NFSV3)
 				nfsrv_postopattr(nd, getret, &at);
 		} else
 			newnfs_trimtrailing(nd, mb1, bpos1);
 		eofflag = 0;
 	} else if (cpos < cend)
 		eofflag = 0;
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		if (eofflag)
 			*tl = newnfs_true;
 		else
 			*tl = newnfs_false;
 	}
 	FREE((caddr_t)cookies, M_TEMP);
 	FREE((caddr_t)rbuf, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Get the settable attributes out of the mbuf list.
  * (Return 0 or EBADRPC)
  */
 int
 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	struct nfsv2_sattr *sp;
 	int error = 0, toclient = 0;
 
 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
 	case ND_NFSV2:
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		/*
 		 * Some old clients didn't fill in the high order 16bits.
 		 * --> check the low order 2 bytes for 0xffff
 		 */
 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
 			nvap->na_mode = nfstov_mode(sp->sa_mode);
 		if (sp->sa_uid != newnfs_xdrneg1)
 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
 		if (sp->sa_gid != newnfs_xdrneg1)
 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
 		if (sp->sa_size != newnfs_xdrneg1)
 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
 #ifdef notyet
 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
 #else
 			nvap->na_atime.tv_sec =
 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
 			nvap->na_atime.tv_nsec = 0;
 #endif
 		}
 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
 		break;
 	case ND_NFSV3:
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			nvap->na_size = fxdr_hyper(tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_atime);
 			toclient = 1;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_atime);
 			nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_mtime);
 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_mtime);
 			if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		break;
 	case ND_NFSV4:
 		error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Handle the setable attributes for V4.
  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
  */
 int
 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	int attrsum = 0;
 	int i, j;
 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
 	int toclient = 0;
 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
 	uid_t uid;
 	gid_t gid;
 
 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
 	if (error)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	attrsize = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Loop around getting the setable attributes. If an unsupported
 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
 	 */
 	if (retnotsup) {
 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 		bitpos = NFSATTRBIT_MAX;
 	} else {
 		bitpos = 0;
 	}
 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
 	    if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	    }
 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
 		switch (bitpos) {
 		case NFSATTRBIT_SIZE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
                      if (vp != NULL && vp->v_type != VREG) {
                             error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
                                 NFSERR_INVAL;
                             goto nfsmout;
 			}
 			nvap->na_size = fxdr_hyper(tl);
 			attrsum += NFSX_HYPER;
 			break;
 		case NFSATTRBIT_ACL:
 			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
 			    p);
 			if (error)
 				goto nfsmout;
 			if (aceerr && !nd->nd_repstat)
 				nd->nd_repstat = aceerr;
 			attrsum += aclsize;
 			break;
 		case NFSATTRBIT_ARCHIVE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_HIDDEN:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_MIMETYPE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			i = fxdr_unsigned(int, *tl);
 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 			if (error)
 				goto nfsmout;
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
 			break;
 		case NFSATTRBIT_MODE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_OWNER:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid,
 				    p);
 				if (!nd->nd_repstat)
 					nvap->na_uid = uid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_OWNERGROUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid,
 				    p);
 				if (!nd->nd_repstat)
 					nvap->na_gid = gid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_SYSTEM:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_TIMEACCESSSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_atime);
 			    toclient = 1;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_atime);
 			    nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		case NFSATTRBIT_TIMEBACKUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMECREATE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMEMODIFYSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_mtime);
 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_mtime);
 			    if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		default:
 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			/*
 			 * set bitpos so we drop out of the loop.
 			 */
 			bitpos = NFSATTRBIT_MAX;
 			break;
 		}
 	}
 
 	/*
 	 * some clients pad the attrlist, so we need to skip over the
 	 * padding.
 	 */
 	if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 	} else {
 		attrsize = NFSM_RNDUP(attrsize);
 		if (attrsum < attrsize)
 			error = nfsm_advance(nd, attrsize - attrsum, -1);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check/setup export credentials.
  */
 int
 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
     struct ucred *credanon)
 {
 	int error = 0;
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (nd->nd_flag & ND_GSS)
 		exp->nes_exflag &= ~MNT_EXPORTANON;
 
 	/*
 	 * Check to see if the operation is allowed for this security flavor.
 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
 	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
 	 */
 	if (nfsvno_testexp(nd, exp) &&
 	    nd->nd_procnum != NFSV4OP_SECINFO &&
 	    nd->nd_procnum != NFSPROC_FSINFO) {
 		if (nd->nd_flag & ND_NFSV4)
 			error = NFSERR_WRONGSEC;
 		else
 			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		goto out;
 	}
 
 	/*
 	 * Check to see if the file system is exported V4 only.
 	 */
 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
 		error = NFSERR_PROGNOTV4;
 		goto out;
 	}
 
 	/*
 	 * Now, map the user credentials.
 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
 	 *  Fsinfo RPC. If set for anything else, this code might need
 	 *  to change.)
 	 */
 	if (NFSVNO_EXPORTED(exp)) {
 		if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
 		     NFSVNO_EXPORTANON(exp) ||
 		     (nd->nd_flag & ND_AUTHNONE) != 0) {
 			nd->nd_cred->cr_uid = credanon->cr_uid;
 			nd->nd_cred->cr_gid = credanon->cr_gid;
 			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 			    credanon->cr_groups);
 		} else if ((nd->nd_flag & ND_GSS) == 0) {
 			/*
 			 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
 			 * if there is a replacement credential with a group
 			 * list set up by "nfsuserd -manage-gids".
 			 * If there is no replacement, nfsrv_getgrpscred()
 			 * simply returns its argument.
 			 */
 			nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
 		}
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check exports.
  */
 int
 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
     struct ucred **credp)
 {
 	int i, error, *secflavors;
 
 	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 	    &exp->nes_numsecflavor, &secflavors);
 	if (error) {
 		if (nfs_rootfhset) {
 			exp->nes_exflag = 0;
 			exp->nes_numsecflavor = 0;
 			error = 0;
 		}
 	} else {
 		/* Copy the security flavors. */
 		for (i = 0; i < exp->nes_numsecflavor; i++)
 			exp->nes_secflavors[i] = secflavors[i];
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a vnode for a file handle and export stuff.
  */
 int
 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
     struct ucred **credp)
 {
 	int i, error, *secflavors;
 
 	*credp = NULL;
 	exp->nes_numsecflavor = 0;
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 	if (error != 0)
 		/* Make sure the server replies ESTALE to the client. */
 		error = ESTALE;
 	if (nam && !error) {
 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 		    &exp->nes_numsecflavor, &secflavors);
 		if (error) {
 			if (nfs_rootfhset) {
 				exp->nes_exflag = 0;
 				exp->nes_numsecflavor = 0;
 				error = 0;
 			} else {
 				vput(*vpp);
 			}
 		} else {
 			/* Copy the security flavors. */
 			for (i = 0; i < exp->nes_numsecflavor; i++)
 				exp->nes_secflavors[i] = secflavors[i];
 		}
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * nfsd_fhtovp() - convert a fh to a vnode ptr
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling nfsvno_fhtovp()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	  for AUTH_SYS
  *	- if mpp != NULL, return the mount point so that it can
  *	  be used for vn_finished_write() by the caller
  */
 void
 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
     struct vnode **vpp, struct nfsexstuff *exp,
     struct mount **mpp, int startwrite, struct thread *p)
 {
 	struct mount *mp;
 	struct ucred *credanon;
 	fhandle_t *fhp;
 
 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
 	/*
 	 * Check for the special case of the nfsv4root_fh.
 	 */
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mpp != NULL)
 		*mpp = mp;
 	if (mp == NULL) {
 		*vpp = NULL;
 		nd->nd_repstat = ESTALE;
 		goto out;
 	}
 
 	if (startwrite) {
 		vn_start_write(NULL, mpp, V_WAIT);
 		if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
 			lktype = LK_EXCLUSIVE;
 	}
 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
 	    &credanon);
 	vfs_unbusy(mp);
 
 	/*
 	 * For NFSv4 without a pseudo root fs, unexported file handles
 	 * can be returned, so that Lookup works everywhere.
 	 */
 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
 	    !(nd->nd_flag & ND_NFSV4)) {
 		vput(*vpp);
 		nd->nd_repstat = EACCES;
 	}
 
 	/*
 	 * Personally, I've never seen any point in requiring a
 	 * reserved port#, since only in the rare case where the
 	 * clients are all boxes with secure system privileges,
 	 * does it provide any enhanced security, but... some people
 	 * believe it to be useful and keep putting this code back in.
 	 * (There is also some "security checker" out there that
 	 *  complains if the nfs server doesn't enforce this.)
 	 * However, note the following:
 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
 	 *	required.
 	 * RFC2623 recommends that, if a reserved port# is checked for,
 	 *	that there be a way to turn that off--> ifdef'd.
 	 */
 #ifdef NFS_REQRSVPORT
 	if (!nd->nd_repstat) {
 		struct sockaddr_in *saddr;
 		struct sockaddr_in6 *saddr6;
 
 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
 		if (!(nd->nd_flag & ND_NFSV4) &&
 		    ((saddr->sin_family == AF_INET &&
 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
 		     (saddr6->sin6_family == AF_INET6 &&
 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
 			vput(*vpp);
 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif	/* NFS_REQRSVPORT */
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (!nd->nd_repstat) {
 		nd->nd_saveduid = nd->nd_cred->cr_uid;
 		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
 		if (nd->nd_repstat)
 			vput(*vpp);
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	if (nd->nd_repstat) {
 		if (startwrite)
 			vn_finished_write(mp);
 		*vpp = NULL;
 		if (mpp != NULL)
 			*mpp = NULL;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * glue for fp.
  */
 static int
 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int error = 0;
 
 	fdp = p->td_proc->p_fd;
 	if (fd < 0 || fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
 		error = EBADF;
 		goto out;
 	}
 	*fpp = fp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Called from nfssvc() to update the exports list. Just call
  * vfs_export(). This has to be done, since the v4 root fake fs isn't
  * in the mount list.
  */
 int
 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
 {
 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
 	int error = 0;
 	struct nameidata nd;
 	fhandle_t fh;
 
 	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
 		nfs_rootfhset = 0;
 	else if (error == 0) {
 		if (nfsexargp->fspec == NULL) {
 			error = EPERM;
 			goto out;
 		}
 		/*
 		 * If fspec != NULL, this is the v4root path.
 		 */
 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
 		    nfsexargp->fspec, p);
 		if ((error = namei(&nd)) != 0)
 			goto out;
 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
 		vrele(nd.ni_vp);
 		if (!error) {
 			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
 			NFSBCOPY((caddr_t)&fh,
 			    nfs_rootfh.nfsrvfh_data,
 			    sizeof (fhandle_t));
 			nfs_rootfhset = 1;
 		}
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * This function needs to test to see if the system is near its limit
  * for memory allocation via malloc() or mget() and return True iff
  * either of these resources are near their limit.
  * XXX (For now, this is just a stub.)
  */
 int nfsrv_testmalloclimit = 0;
 int
 nfsrv_mallocmget_limit(void)
 {
 	static int printmesg = 0;
 	static int testval = 1;
 
 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
 		if ((printmesg++ % 100) == 0)
 			printf("nfsd: malloc/mget near limit\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * BSD specific initialization of a mount point.
  */
 void
 nfsd_mntinit(void)
 {
 	static int inited = 0;
 
 	if (inited)
 		return;
 	inited = 1;
 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
 	nfsv4root_mnt.mnt_export = NULL;
 	TAILQ_INIT(&nfsv4root_opt);
 	TAILQ_INIT(&nfsv4root_newopt);
 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
 	nfsv4root_mnt.mnt_activevnodelistsize = 0;
 }
 
 /*
  * Get a vnode for a file handle, without checking exports, etc.
  */
 struct vnode *
 nfsvno_getvp(fhandle_t *fhp)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mp == NULL)
 		return (NULL);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error)
 		return (NULL);
 	return (vp);
 }
 
 /*
  * Do a local VOP_ADVLOCK().
  */
 int
 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
     u_int64_t end, struct thread *td)
 {
 	int error = 0;
 	struct flock fl;
 	u_int64_t tlen;
 
 	if (nfsrv_dolocallocks == 0)
 		goto out;
 	ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 
 	fl.l_whence = SEEK_SET;
 	fl.l_type = ftype;
 	fl.l_start = (off_t)first;
 	if (end == NFS64BITSSET) {
 		fl.l_len = 0;
 	} else {
 		tlen = end - first;
 		fl.l_len = (off_t)tlen;
 	}
 	/*
 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
 	 * values for all calls, so that all locks will be held by the
 	 * nfsd server. (The nfsd server handles conflicts between the
 	 * various clients.)
 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
 	 * bytes, so it can't be put in l_sysid.
 	 */
 	if (nfsv4_sysid == 0)
 		nfsv4_sysid = nlm_acquire_next_sysid();
 	fl.l_pid = (pid_t)0;
 	fl.l_sysid = (int)nfsv4_sysid;
 
 	if (ftype == F_UNLCK)
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 		    (F_POSIX | F_REMOTE));
 	else
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 		    (F_POSIX | F_REMOTE));
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check the nfsv4 root exports.
  */
 int
 nfsvno_v4rootexport(struct nfsrv_descript *nd)
 {
 	struct ucred *credanon;
 	int exflags, error = 0, numsecflavor, *secflavors, i;
 
 	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
 	    &credanon, &numsecflavor, &secflavors);
 	if (error) {
 		error = NFSERR_PROGUNAVAIL;
 		goto out;
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	for (i = 0; i < numsecflavor; i++) {
 		if (secflavors[i] == AUTH_SYS)
 			nd->nd_flag |= ND_EXAUTHSYS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
 			nd->nd_flag |= ND_EXGSS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
 			nd->nd_flag |= ND_EXGSSINTEGRITY;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
 			nd->nd_flag |= ND_EXGSSPRIVACY;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Nfs server pseudo system call for the nfsd's
  */
 /*
  * MPSAFE
  */
 static int
 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
 {
 	struct file *fp;
 	struct nfsd_addsock_args sockarg;
 	struct nfsd_nfsd_args nfsdarg;
 	cap_rights_t rights;
 	int error;
 
 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
 		if (error)
 			goto out;
 		/*
 		 * Since we don't know what rights might be required,
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
 		error = fget(td, sockarg.sock,
 		    cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
 		if (error != 0)
 			goto out;
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
 			error = EPERM;
 			goto out;
 		}
 		error = nfsrvd_addsock(fp);
 		fdrop(fp, td);
 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
 		if (uap->argp == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		error = copyin(uap->argp, (caddr_t)&nfsdarg,
 		    sizeof (nfsdarg));
 		if (error)
 			goto out;
 		error = nfsrvd_nfsd(td, &nfsdarg);
 	} else {
 		error = nfssvc_srvcall(td, uap, td->td_ucred);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 static int
 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 {
 	struct nfsex_args export;
 	struct file *fp = NULL;
 	int stablefd, len;
 	struct nfsd_clid adminrevoke;
 	struct nfsd_dumplist dumplist;
 	struct nfsd_dumpclients *dumpclients;
 	struct nfsd_dumplocklist dumplocklist;
 	struct nfsd_dumplocks *dumplocks;
 	struct nameidata nd;
 	vnode_t vp;
 	int error = EINVAL, igotlock;
 	struct proc *procp;
 	static int suspend_nfsd = 0;
 
 	if (uap->flag & NFSSVC_PUBLICFH) {
 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
 		    sizeof (fhandle_t));
 		error = copyin(uap->argp,
 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
 		if (!error)
 			nfs_pubfhset = 1;
 	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
 		error = copyin(uap->argp,(caddr_t)&export,
 		    sizeof (struct nfsex_args));
 		if (!error)
 			error = nfsrv_v4rootexport(&export, cred, p);
 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
 		nfs_pubfhset = 0;
 		error = 0;
 	} else if (uap->flag & NFSSVC_STABLERESTART) {
 		error = copyin(uap->argp, (caddr_t)&stablefd,
 		    sizeof (int));
 		if (!error)
 			error = fp_getfvp(p, stablefd, &fp, &vp);
 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
 			error = EBADF;
 		if (!error && newnfs_numnfsd != 0)
 			error = EPERM;
 		if (!error) {
 			nfsrv_stablefirst.nsf_fp = fp;
 			nfsrv_setupstable(p);
 		}
 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
 		    sizeof (struct nfsd_clid));
 		if (!error)
 			error = nfsrv_adminrevoke(&adminrevoke, p);
 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
 		error = copyin(uap->argp, (caddr_t)&dumplist,
 		    sizeof (struct nfsd_dumplist));
 		if (!error && (dumplist.ndl_size < 1 ||
 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error) {
 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
 		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
 			M_TEMP, M_WAITOK);
 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
 		    error = copyout(dumpclients,
 			CAST_USER_ADDR_T(dumplist.ndl_list), len);
 		    free((caddr_t)dumpclients, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
 		    sizeof (struct nfsd_dumplocklist));
 		if (!error && (dumplocklist.ndllck_size < 1 ||
 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error)
 			error = nfsrv_lookupfilename(&nd,
 				dumplocklist.ndllck_fname, p);
 		if (!error) {
 			len = sizeof (struct nfsd_dumplocks) *
 				dumplocklist.ndllck_size;
 			dumplocks = (struct nfsd_dumplocks *)malloc(len,
 				M_TEMP, M_WAITOK);
 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
 			    dumplocklist.ndllck_size, p);
 			vput(nd.ni_vp);
 			error = copyout(dumplocks,
 			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
 			free((caddr_t)dumplocks, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
 		procp = p->td_proc;
 		PROC_LOCK(procp);
 		nfsd_master_pid = procp->p_pid;
 		bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
 		nfsd_master_start = procp->p_stats->p_start;
 		nfsd_master_proc = procp;
 		PROC_UNLOCK(procp);
 	} else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd == 0) {
 			/* Lock out all nfsd threads */
 			do {
 				igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
 				    NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 			} while (igotlock == 0 && suspend_nfsd == 0);
 			suspend_nfsd = 1;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	} else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd != 0) {
 			nfsv4_unlock(&nfsd_suspend_lock, 0);
 			suspend_nfsd = 0;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	}
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check exports.
  * Returns 0 if ok, 1 otherwise.
  */
 int
 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
 {
 	int i;
 
 	/*
 	 * This seems odd, but allow the case where the security flavor
 	 * list is empty. This happens when NFSv4 is traversing non-exported
 	 * file systems. Exported file systems should always have a non-empty
 	 * security flavor list.
 	 */
 	if (exp->nes_numsecflavor == 0)
 		return (0);
 
 	for (i = 0; i < exp->nes_numsecflavor; i++) {
 		/*
 		 * The tests for privacy and integrity must be first,
 		 * since ND_GSS is set for everything but AUTH_SYS.
 		 */
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
 		    (nd->nd_flag & ND_GSSPRIVACY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
 		    (nd->nd_flag & ND_GSSINTEGRITY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
 		    (nd->nd_flag & ND_GSS))
 			return (0);
 		if (exp->nes_secflavors[i] == AUTH_SYS &&
 		    (nd->nd_flag & ND_GSS) == 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Calculate a hash value for the fid in a file handle.
  */
 uint32_t
 nfsrv_hashfh(fhandle_t *fhp)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
 	return (hashval);
 }
 
 /*
  * Calculate a hash value for the sessionid.
  */
 uint32_t
 nfsrv_hashsessionid(uint8_t *sessionid)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
 	return (hashval);
 }
 
 /*
  * Signal the userland master nfsd to backup the stable restart file.
  */
 void
 nfsrv_backupstable(void)
 {
 	struct proc *procp;
 
 	if (nfsd_master_proc != NULL) {
 		procp = pfind(nfsd_master_pid);
 		/* Try to make sure it is the correct process. */
 		if (procp == nfsd_master_proc &&
 		    procp->p_stats->p_start.tv_sec ==
 		    nfsd_master_start.tv_sec &&
 		    procp->p_stats->p_start.tv_usec ==
 		    nfsd_master_start.tv_usec &&
 		    strcmp(procp->p_comm, nfsd_master_comm) == 0)
 			kern_psignal(procp, SIGUSR2);
 		else
 			nfsd_master_proc = NULL;
 
 		if (procp != NULL)
 			PROC_UNLOCK(procp);
 	}
 }
 
 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 
 /*
  * Called once to initialize data structures...
  */
 static int
 nfsd_modevent(module_t mod, int type, void *data)
 {
 	int error = 0, i;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded)
 			goto out;
 		newnfs_portinit();
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
 			    MTX_DEF);
 			mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
 			    MTX_DEF);
 		}
 		mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
 		mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
 		mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 		nfsrvd_initcache();
 		nfsd_init();
 		NFSD_LOCK();
 		nfsrvd_init(0);
 		NFSD_UNLOCK();
 		nfsd_mntinit();
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 #endif
 		nfsd_call_servertimer = nfsrv_servertimer;
 		nfsd_call_nfsd = nfssvc_nfsd;
 		loaded = 1;
 		break;
 
 	case MOD_UNLOAD:
 		if (newnfs_numnfsd != 0) {
 			error = EBUSY;
 			break;
 		}
 
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = NULL;
 		vn_deleg_ops.vndeleg_disable = NULL;
 #endif
 		nfsd_call_servertimer = NULL;
 		nfsd_call_nfsd = NULL;
 
 		/* Clean out all NFSv4 state. */
 		nfsrv_throwawayallstate(curthread);
 
 		/* Clean the NFS server reply cache */
 		nfsrvd_cleancache();
 
 		/* Free up the krpc server pool. */
 		if (nfsrvd_pool != NULL)
 			svcpool_destroy(nfsrvd_pool);
 
 		/* and get rid of the locks */
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_destroy(&nfsrchash_table[i].mtx);
 			mtx_destroy(&nfsrcahash_table[i].mtx);
 		}
 		mtx_destroy(&nfsrc_udpmtx);
 		mtx_destroy(&nfs_v4root_mutex);
 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 		for (i = 0; i < nfsrv_sessionhashsize; i++)
 			mtx_destroy(&nfssessionhash[i].mtx);
 		lockdestroy(&nfsv4root_mnt.mnt_explock);
 		free(nfsclienthash, M_NFSDCLIENT);
 		free(nfslockhash, M_NFSDLOCKFILE);
 		free(nfssessionhash, M_NFSDSESSION);
 		loaded = 0;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 static moduledata_t nfsd_mod = {
 	"nfsd",
 	nfsd_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfsd, 1);
 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
 
Index: head/sys/kern/capabilities.conf
===================================================================
--- head/sys/kern/capabilities.conf	(revision 318735)
+++ head/sys/kern/capabilities.conf	(revision 318736)
@@ -1,738 +1,743 @@
 ##
 ## Copyright (c) 2008-2010 Robert N. M. Watson
 ## All rights reserved.
 ##
 ## This software was developed at the University of Cambridge Computer
 ## Laboratory with support from a grant from Google, Inc.
 ##
 ## Redistribution and use in source and binary forms, with or without
 ## modification, are permitted provided that the following conditions
 ## are met:
 ## 1. Redistributions of source code must retain the above copyright
 ##    notice, this list of conditions and the following disclaimer.
 ## 2. Redistributions in binary form must reproduce the above copyright
 ##    notice, this list of conditions and the following disclaimer in the
 ##    documentation and/or other materials provided with the distribution.
 ##
 ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 ## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ## ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 ## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 ## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 ## SUCH DAMAGE.
 ##
 ## List of system calls enabled in capability mode, one name per line.
 ##
 ## Notes:
 ## - sys_exit(2), abort2(2) and close(2) are very important.
 ## - Sorted alphabetically, please keep it that way.
 ##
 ## $FreeBSD$
 ##
 
 ##
 ## Allow ACL and MAC label operations by file descriptor, subject to
 ## capability rights.  Allow MAC label operations on the current process but
 ## we will need to scope __mac_get_pid(2).
 ##
 __acl_aclcheck_fd
 __acl_delete_fd
 __acl_get_fd
 __acl_set_fd
 __mac_get_fd
 #__mac_get_pid
 __mac_get_proc
 __mac_set_fd
 __mac_set_proc
 
 ##
 ## Allow sysctl(2) as we scope internal to the call; this is a global
 ## namespace, but there are several critical sysctls required for almost
 ## anything to run, such as hw.pagesize.  For now that policy lives in the
 ## kernel for performance and simplicity, but perhaps it could move to a
 ## proxying daemon in userspace.
 ##
 __sysctl
 
 ##
 ## Allow umtx operations as these are scoped by address space.
 ##
 ## XXRW: Need to check this very carefully.
 ##
 _umtx_op
 
 ##
 ## Allow process termination using abort2(2).
 ##
 abort2
 
 ##
 ## Allow accept(2) since it doesn't manipulate namespaces directly, rather
 ## relies on existing bindings on a socket, subject to capability rights.
 ##
 accept
 accept4
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 aio_cancel
 aio_error
 aio_fsync
 aio_read
 aio_return
 aio_suspend
 aio_waitcomplete
 aio_write
 
 ##
 ## audit(2) is a global operation, submitting to the global trail, but it is
 ## controlled by privilege, and it might be useful to be able to submit
 ## records from sandboxes.  For now, disallow, but we may want to think about
 ## providing some sort of proxy service for this.
 ##
 #audit
 
 ##
 ## Allow bindat(2).
 ##
 bindat
 
 ##
 ## Allow capability mode and capability system calls.
 ##
 cap_enter
 cap_fcntls_get
 cap_fcntls_limit
 cap_getmode
 cap_ioctls_get
 cap_ioctls_limit
 __cap_rights_get
 cap_rights_limit
 
 ##
 ## Allow read-only clock operations.
 ##
 clock_getres
 clock_gettime
 
 ##
 ## Always allow file descriptor close(2).
 ##
 close
 closefrom
 
 ##
 ## Allow connectat(2).
 ##
 connectat
 
 ##
 ## cpuset(2) and related calls require scoping by process, but should
 ## eventually be allowed, at least in the current process case.
 ##
 #cpuset
 #cpuset_getaffinity
 #cpuset_getid
 #cpuset_setaffinity
 #cpuset_setid
 
 ##
 ## Always allow dup(2) and dup2(2) manipulation of the file descriptor table.
 ##
 dup
 dup2
 
 ##
 ## Allow extended attribute operations by file descriptor, subject to
 ## capability rights.
 ##
 extattr_delete_fd
 extattr_get_fd
 extattr_list_fd
 extattr_set_fd
 
 ##
 ## Allow changing file flags, mode, and owner by file descriptor, subject to
 ## capability rights.
 ##
 fchflags
 fchmod
 fchown
 
 ##
 ## For now, allow fcntl(2), subject to capability rights, but this probably
 ## needs additional scoping.
 ##
 fcntl
 
 ##
 ## Allow fexecve(2), subject to capability rights.  We perform some scoping,
 ## such as disallowing privilege escalation.
 ##
 fexecve
 
 ##
 ## Allow flock(2), subject to capability rights.
 ##
 flock
 
 ##
 ## Allow fork(2), even though it returns pids -- some applications seem to
 ## prefer this interface.
 ##
 fork
 
 ##
 ## Allow fpathconf(2), subject to capability rights.
 ##
 fpathconf
 
 ##
 ## Allow various file descriptor-based I/O operations, subject to capability
 ## rights.
 ##
+freebsd11_fstat
+freebsd11_fstatat
+freebsd11_getdirentries
+freebsd11_fstatfs
+freebsd11_mknodat
 freebsd6_ftruncate
 freebsd6_lseek
 freebsd6_mmap
 freebsd6_pread
 freebsd6_pwrite
 
 ##
 ## Allow querying file and file system state with fstat(2) and fstatfs(2),
 ## subject to capability rights.
 ##
 fstat
 fstatfs
 
 ##
 ## Allow further file descriptor-based I/O operations, subject to capability
 ## rights.
 ##
 fsync
 ftruncate
 
 ##
 ## Allow futimens(2) and futimes(2), subject to capability rights.
 ##
 futimens
 futimes
 
 ##
 ## Allow querying process audit state, subject to normal access control.
 ##
 getaudit
 getaudit_addr
 getauid
 
 ##
 ## Allow thread context management with getcontext(2).
 ##
 getcontext
 
 ##
 ## Allow directory I/O on a file descriptor, subject to capability rights.
 ## Originally we had separate capabilities for directory-specific read
 ## operations, but on BSD we allow reading the raw directory data, so we just
 ## rely on CAP_READ now.
 ##
 getdents
 getdirentries
 
 ##
 ## Allow querying certain trivial global state.
 ##
 getdomainname
 
 ##
 ## Allow querying certain per-process resource limit state.
 ##
 getdtablesize
 
 ##
 ## Allow querying current process credential state.
 ##
 getegid
 geteuid
 
 ##
 ## Allow querying certain trivial global state.
 ##
 gethostid
 gethostname
 
 ##
 ## Allow querying per-process timer.
 ##
 getitimer
 
 ##
 ## Allow querying current process credential state.
 ##
 getgid
 getgroups
 getlogin
 
 ##
 ## Allow querying certain trivial global state.
 ##
 getpagesize
 getpeername
 
 ##
 ## Allow querying certain per-process scheduling, resource limit, and
 ## credential state.
 ##
 ## XXXRW: getpgid(2) needs scoping.  It's not clear if it's worth scoping
 ## getppid(2).  getpriority(2) needs scoping.  getrusage(2) needs scoping.
 ## getsid(2) needs scoping.
 ##
 getpgid
 getpgrp
 getpid
 getppid
 getpriority
 getresgid
 getresuid
 getrlimit
 getrusage
 getsid
 
 ##
 ## Allow querying socket state, subject to capability rights.
 ##
 ## XXXRW: getsockopt(2) may need more attention.
 ##
 getsockname
 getsockopt
 
 ##
 ## Allow querying the global clock.
 ##
 gettimeofday
 
 ##
 ## Allow querying current process credential state.
 ##
 getuid
 
 ##
 ## Allow ioctl(2), which hopefully will be limited by applications only to
 ## required commands with cap_ioctls_limit(2) syscall.
 ##
 ioctl
 
 ##
 ## Allow querying current process credential state.
 ##
 issetugid
 
 ##
 ## Allow kevent(2), as we will authorize based on capability rights on the
 ## target descriptor.
 ##
 kevent
 
 ##
 ## Allow kill(2), as we allow the process to send signals only to himself.
 ##
 kill
 
 ##
 ## Allow message queue operations on file descriptors, subject to capability
 ## rights.
 ##
 kmq_notify
 kmq_setattr
 kmq_timedreceive
 kmq_timedsend
 
 ##
 ## Allow kqueue(2), we will control use.
 ##
 kqueue
 
 ##
 ## Allow managing per-process timers.
 ##
 ktimer_create
 ktimer_delete
 ktimer_getoverrun
 ktimer_gettime
 ktimer_settime
 
 ##
 ## We can't allow ktrace(2) because it relies on a global namespace, but we
 ## might want to introduce an fktrace(2) of some sort.
 ##
 #ktrace
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 lio_listio
 
 ##
 ## Allow listen(2), subject to capability rights.
 ##
 ## XXXRW: One might argue this manipulates a global namespace.
 ##
 listen
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 lseek
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 madvise
 mincore
 minherit
 mlock
 mlockall
 
 ##
 ## Allow memory mapping a file descriptor, and updating protections, subject
 ## to capability rights.
 ##
 mmap
 mprotect
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 msync
 munlock
 munlockall
 munmap
 
 ##
 ## Allow the current process to sleep.
 ##
 nanosleep
 
 ##
 ## Allow querying the global clock.
 ##
 ntp_gettime
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 oaio_read
 oaio_write
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 obreak
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 olio_listio
 
 ##
 ## Operations relative to directory capabilities.
 ##
 chflagsat
 faccessat
 fchmodat
 fchownat
 fstatat
 futimesat
 linkat
 mkdirat
 mkfifoat
 mknodat
 openat
 readlinkat
 renameat
 symlinkat
 unlinkat
 utimensat
 
 ##
 ## Process descriptor-related system calls are allowed.
 ##
 pdfork
 pdgetpid
 pdkill
 #pdwait4	# not yet implemented
 
 ##
 ## Allow pipe(2).
 ##
 pipe
 pipe2
 
 ##
 ## Allow poll(2), which will be scoped by capability rights.
 ## XXXRW: We don't yet do that scoping.
 ##
 poll
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 pread
 preadv
 
 ##
 ## Allow access to profiling state on the current process.
 ##
 profil
 
 ##
 ## Disallow ptrace(2) for now, but we do need debugging facilities in
 ## capability mode, so we will want to revisit this, possibly by scoping its
 ## operation.
 ##
 #ptrace
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 pwrite
 pwritev
 read
 readv
 recv
 recvfrom
 recvmsg
 
 ##
 ## Allow real-time scheduling primitives to be used.
 ##
 ## XXXRW: These require scoping.
 ##
 rtprio
 rtprio_thread
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 sbrk
 
 ##
 ## Allow querying trivial global scheduler state.
 ##
 sched_get_priority_max
 sched_get_priority_min
 
 ##
 ## Allow various thread/process scheduler operations.
 ##
 ## XXXRW: Some of these require further scoping.
 ##
 sched_getparam
 sched_getscheduler
 sched_rr_get_interval
 sched_setparam
 sched_setscheduler
 sched_yield
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 sctp_generic_recvmsg
 sctp_generic_sendmsg
 sctp_generic_sendmsg_iov
 sctp_peeloff
 
 ##
 ## Allow pselect(2) and select(2), which will be scoped by capability rights.
 ##
 ## XXXRW: But is it?
 ##
 pselect
 select
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.  Use of
 ## explicit addresses here is restricted by the system calls themselves.
 ##
 send
 sendfile
 sendmsg
 sendto
 
 ##
 ## Allow setting per-process audit state, which is controlled separately by
 ## privileges.
 ##
 setaudit
 setaudit_addr
 setauid
 
 ##
 ## Allow setting thread context.
 ##
 setcontext
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setegid
 seteuid
 setgid
 
 ##
 ## Allow use of the process interval timer.
 ##
 setitimer
 
 ##
 ## Allow setpriority(2).
 ##
 ## XXXRW: Requires scoping.
 ##
 setpriority
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setregid
 setresgid
 setresuid
 setreuid
 
 ##
 ## Allow setting process resource limits with setrlimit(2).
 ##
 setrlimit
 
 ##
 ## Allow creating a new session with setsid(2).
 ##
 setsid
 
 ##
 ## Allow setting socket options with setsockopt(2), subject to capability
 ## rights.
 ##
 ## XXXRW: Might require scoping.
 ##
 setsockopt
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setuid
 
 ##
 ## shm_open(2) is scoped so as to allow only access to new anonymous objects.
 ##
 shm_open
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 shutdown
 
 ##
 ## Allow signal control on current process.
 ##
 sigaction
 sigaltstack
 sigblock
 sigpending
 sigprocmask
 sigqueue
 sigreturn
 sigsetmask
 sigstack
 sigsuspend
 sigtimedwait
 sigvec
 sigwaitinfo
 sigwait
 
 ##
 ## Allow creating new socket pairs with socket(2) and socketpair(2).
 ##
 socket
 socketpair
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 ## XXXRW: Kernel doesn't implement this, so drop?
 ##
 sstk
 
 ##
 ## Do allow sync(2) for now, but possibly shouldn't.
 ##
 sync
 
 ##
 ## Always allow process termination with sys_exit(2).
 ##
 sys_exit
 
 ##
 ## sysarch(2) does rather diverse things, but is required on at least i386
 ## in order to configure per-thread data.  As such, it's scoped on each
 ## architecture.
 ##
 sysarch
 
 ##
 ## Allow thread operations operating only on current process.
 ##
 thr_create
 thr_exit
 thr_kill
 
 ##
 ## Disallow thr_kill2(2), as it may operate beyond the current process.
 ##
 ## XXXRW: Requires scoping.
 ##
 #thr_kill2
 
 ##
 ## Allow thread operations operating only on current process.
 ##
 thr_new
 thr_self
 thr_set_name
 thr_suspend
 thr_wake
 
 ##
 ## Allow manipulation of the current process umask with umask(2).
 ##
 umask
 
 ##
 ## Allow submitting of process trace entries with utrace(2).
 ##
 utrace
 
 ##
 ## Allow generating UUIDs with uuidgen(2).
 ##
 uuidgen
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 write
 writev
 
 ##
 ## Allow processes to yield(2).
 ##
 yield
Index: head/sys/kern/kern_acct.c
===================================================================
--- head/sys/kern/kern_acct.c	(revision 318735)
+++ head/sys/kern/kern_acct.c	(revision 318736)
@@ -1,652 +1,657 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * Copyright (c) 2005 Robert N. M. Watson
  * All rights reserved.
  *
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1994 Christopher G. Demetriou
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_acct.c	8.1 (Berkeley) 6/14/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/tty.h>
 #include <sys/vnode.h>
 
 #include <security/mac/mac_framework.h>
 
+_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_trailer) ==
+    sizeof(struct acctv2) - offsetof(struct acctv2, ac_trailer), "trailer");
+_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_len2) ==
+    sizeof(struct acctv2) - offsetof(struct acctv2, ac_len2), "len2");
+
 /*
  * The routines implemented in this file are described in:
  *      Leffler, et al.: The Design and Implementation of the 4.3BSD
  *	    UNIX Operating System (Addison Welley, 1989)
  * on pages 62-63.
  * On May 2007 the historic 3 bits base 8 exponent, 13 bit fraction
  * compt_t representation described in the above reference was replaced
  * with that of IEEE-754 floats.
  *
  * Arguably, to simplify accounting operations, this mechanism should
  * be replaced by one in which an accounting log file (similar to /dev/klog)
  * is read by a user process, etc.  However, that has its own problems.
  */
 
 /* Floating point definitions from <float.h>. */
 #define FLT_MANT_DIG    24              /* p */
 #define FLT_MAX_EXP     128             /* emax */
 
 /*
  * Internal accounting functions.
  * The former's operation is described in Leffler, et al., and the latter
  * was provided by UCB with the 4.4BSD-Lite release
  */
 static uint32_t	encode_timeval(struct timeval);
 static uint32_t	encode_long(long);
 static void	acctwatch(void);
 static void	acct_thread(void *);
 static int	acct_disable(struct thread *, int);
 
 /*
  * Accounting vnode pointer, saved vnode pointer, and flags for each.
  * acct_sx protects against changes to the active vnode and credentials
  * while accounting records are being committed to disk.
  */
 static int		 acct_configured;
 static int		 acct_suspended;
 static struct vnode	*acct_vp;
 static struct ucred	*acct_cred;
 static struct plimit	*acct_limit;
 static int		 acct_flags;
 static struct sx	 acct_sx;
 
 SX_SYSINIT(acct, &acct_sx, "acct_sx");
 
 /*
  * State of the accounting kthread.
  */
 static int		 acct_state;
 
 #define	ACCT_RUNNING	1	/* Accounting kthread is running. */
 #define	ACCT_EXITREQ	2	/* Accounting kthread should exit. */
 
 /*
  * Values associated with enabling and disabling accounting
  */
 static int acctsuspend = 2;	/* stop accounting when < 2% free space left */
 SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW,
 	&acctsuspend, 0, "percentage of free disk space below which accounting stops");
 
 static int acctresume = 4;	/* resume when free space risen to > 4% */
 SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW,
 	&acctresume, 0, "percentage of free disk space above which accounting resumes");
 
 static int acctchkfreq = 15;	/* frequency (in seconds) to check space */
 
 static int
 sysctl_acct_chkfreq(SYSCTL_HANDLER_ARGS)
 {
 	int error, value;
 
 	/* Write out the old value. */
 	error = SYSCTL_OUT(req, &acctchkfreq, sizeof(int));
 	if (error || req->newptr == NULL)
 		return (error);
 
 	/* Read in and verify the new value. */
 	error = SYSCTL_IN(req, &value, sizeof(int));
 	if (error)
 		return (error);
 	if (value <= 0)
 		return (EINVAL);
 	acctchkfreq = value;
 	return (0);
 }
 SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW,
     &acctchkfreq, 0, sysctl_acct_chkfreq, "I",
     "frequency for checking the free space");
 
 SYSCTL_INT(_kern, OID_AUTO, acct_configured, CTLFLAG_RD, &acct_configured, 0,
 	"Accounting configured or not");
 
 SYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0,
 	"Accounting suspended or not");
 
 /*
  * Accounting system call.  Written based on the specification and previous
  * implementation done by Mark Tinguely.
  */
 int
 sys_acct(struct thread *td, struct acct_args *uap)
 {
 	struct nameidata nd;
 	int error, flags, i, replacing;
 
 	error = priv_check(td, PRIV_ACCT);
 	if (error)
 		return (error);
 
 	/*
 	 * If accounting is to be started to a file, open that file for
 	 * appending and make sure it's a 'normal'.
 	 */
 	if (uap->path != NULL) {
 		NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1,
 		    UIO_USERSPACE, uap->path, td);
 		flags = FWRITE | O_APPEND;
 		error = vn_open(&nd, &flags, 0, NULL);
 		if (error)
 			return (error);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 #ifdef MAC
 		error = mac_system_check_acct(td->td_ucred, nd.ni_vp);
 		if (error) {
 			VOP_UNLOCK(nd.ni_vp, 0);
 			vn_close(nd.ni_vp, flags, td->td_ucred, td);
 			return (error);
 		}
 #endif
 		VOP_UNLOCK(nd.ni_vp, 0);
 		if (nd.ni_vp->v_type != VREG) {
 			vn_close(nd.ni_vp, flags, td->td_ucred, td);
 			return (EACCES);
 		}
 #ifdef MAC
 	} else {
 		error = mac_system_check_acct(td->td_ucred, NULL);
 		if (error)
 			return (error);
 #endif
 	}
 
 	/*
 	 * Disallow concurrent access to the accounting vnode while we swap
 	 * it out, in order to prevent access after close.
 	 */
 	sx_xlock(&acct_sx);
 
 	/*
 	 * Don't log spurious disable/enable messages if we are
 	 * switching from one accounting file to another due to log
 	 * rotation.
 	 */
 	replacing = (acct_vp != NULL && uap->path != NULL);
 
 	/*
 	 * If accounting was previously enabled, kill the old space-watcher,
 	 * close the file, and (if no new file was specified, leave).  Reset
 	 * the suspended state regardless of whether accounting remains
 	 * enabled.
 	 */
 	acct_suspended = 0;
 	if (acct_vp != NULL)
 		error = acct_disable(td, !replacing);
 	if (uap->path == NULL) {
 		if (acct_state & ACCT_RUNNING) {
 			acct_state |= ACCT_EXITREQ;
 			wakeup(&acct_state);
 		}
 		sx_xunlock(&acct_sx);
 		return (error);
 	}
 
 	/*
 	 * Create our own plimit object without limits. It will be assigned
 	 * to exiting processes.
 	 */
 	acct_limit = lim_alloc();
 	for (i = 0; i < RLIM_NLIMITS; i++)
 		acct_limit->pl_rlimit[i].rlim_cur =
 		    acct_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
 
 	/*
 	 * Save the new accounting file vnode, and schedule the new
 	 * free space watcher.
 	 */
 	acct_vp = nd.ni_vp;
 	acct_cred = crhold(td->td_ucred);
 	acct_flags = flags;
 	if (acct_state & ACCT_RUNNING)
 		acct_state &= ~ACCT_EXITREQ;
 	else {
 		/*
 		 * Try to start up an accounting kthread.  We may start more
 		 * than one, but if so the extras will commit suicide as
 		 * soon as they start up.
 		 */
 		error = kproc_create(acct_thread, NULL, NULL, 0, 0,
 		    "accounting");
 		if (error) {
 			(void) acct_disable(td, 0);
 			sx_xunlock(&acct_sx);
 			log(LOG_NOTICE, "Unable to start accounting thread\n");
 			return (error);
 		}
 	}
 	acct_configured = 1;
 	sx_xunlock(&acct_sx);
 	if (!replacing)
 		log(LOG_NOTICE, "Accounting enabled\n");
 	return (error);
 }
 
 /*
  * Disable currently in-progress accounting by closing the vnode, dropping
  * our reference to the credential, and clearing the vnode's flags.
  */
 static int
 acct_disable(struct thread *td, int logging)
 {
 	int error;
 
 	sx_assert(&acct_sx, SX_XLOCKED);
 	error = vn_close(acct_vp, acct_flags, acct_cred, td);
 	crfree(acct_cred);
 	lim_free(acct_limit);
 	acct_configured = 0;
 	acct_vp = NULL;
 	acct_cred = NULL;
 	acct_flags = 0;
 	if (logging)
 		log(LOG_NOTICE, "Accounting disabled\n");
 	return (error);
 }
 
 /*
  * Write out process accounting information, on process exit.
  * Data to be written out is specified in Leffler, et al.
  * and are enumerated below.  (They're also noted in the system
  * "acct.h" header file.)
  */
 int
 acct_process(struct thread *td)
 {
-	struct acctv2 acct;
+	struct acctv3 acct;
 	struct timeval ut, st, tmp;
 	struct plimit *oldlim;
 	struct proc *p;
 	struct rusage ru;
 	int t, ret;
 
 	/*
 	 * Lockless check of accounting condition before doing the hard
 	 * work.
 	 */
 	if (acct_vp == NULL || acct_suspended)
 		return (0);
 
 	sx_slock(&acct_sx);
 
 	/*
 	 * If accounting isn't enabled, don't bother.  Have to check again
 	 * once we own the lock in case we raced with disabling of accounting
 	 * by another thread.
 	 */
 	if (acct_vp == NULL || acct_suspended) {
 		sx_sunlock(&acct_sx);
 		return (0);
 	}
 
 	p = td->td_proc;
 
 	/*
 	 * Get process accounting information.
 	 */
 
 	sx_slock(&proctree_lock);
 	PROC_LOCK(p);
 
 	/* (1) The terminal from which the process was started */
 	if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)
 		acct.ac_tty = tty_udev(p->p_pgrp->pg_session->s_ttyp);
 	else
 		acct.ac_tty = NODEV;
 	sx_sunlock(&proctree_lock);
 
 	/* (2) The name of the command that ran */
 	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
 
 	/* (3) The amount of user and system time that was used */
 	rufetchcalc(p, &ru, &ut, &st);
 	acct.ac_utime = encode_timeval(ut);
 	acct.ac_stime = encode_timeval(st);
 
 	/* (4) The elapsed time the command ran (and its starting time) */
 	getboottime(&tmp);
 	timevaladd(&tmp, &p->p_stats->p_start);
 	acct.ac_btime = tmp.tv_sec;
 	microuptime(&tmp);
 	timevalsub(&tmp, &p->p_stats->p_start);
 	acct.ac_etime = encode_timeval(tmp);
 
 	/* (5) The average amount of memory used */
 	tmp = ut;
 	timevaladd(&tmp, &st);
 	/* Convert tmp (i.e. u + s) into hz units to match ru_i*. */
 	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
 	if (t)
 		acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss +
 		    + ru.ru_isrss) / t);
 	else
 		acct.ac_mem = 0;
 
 	/* (6) The number of disk I/O operations done */
 	acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock);
 
 	/* (7) The UID and GID of the process */
 	acct.ac_uid = p->p_ucred->cr_ruid;
 	acct.ac_gid = p->p_ucred->cr_rgid;
 
 	/* (8) The boolean flags that tell how the process terminated, etc. */
 	acct.ac_flagx = p->p_acflag;
 
 	/* Setup ancillary structure fields. */
 	acct.ac_flagx |= ANVER;
 	acct.ac_zero = 0;
-	acct.ac_version = 2;
+	acct.ac_version = 3;
 	acct.ac_len = acct.ac_len2 = sizeof(acct);
 
 	/*
 	 * Eliminate rlimits (file size limit in particular).
 	 */
 	oldlim = p->p_limit;
 	p->p_limit = lim_hold(acct_limit);
 	PROC_UNLOCK(p);
 	lim_free(oldlim);
 
 	/*
 	 * Write the accounting information to the file.
 	 */
 	ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct),
 	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED,
 	    NULL, td);
 	sx_sunlock(&acct_sx);
 	return (ret);
 }
 
 /* FLOAT_CONVERSION_START (Regression testing; don't remove this line.) */
 
 /* Convert timevals and longs into IEEE-754 bit patterns. */
 
 /* Mantissa mask (MSB is implied, so subtract 1). */
 #define MANT_MASK ((1 << (FLT_MANT_DIG - 1)) - 1)
 
 /*
  * We calculate integer values to a precision of approximately
  * 28 bits.
  * This is high-enough precision to fill the 24 float bits
  * and low-enough to avoid overflowing the 32 int bits.
  */
 #define CALC_BITS 28
 
 /* log_2(1000000). */
 #define LOG2_1M 20
 
 /*
  * Convert the elements of a timeval into a 32-bit word holding
  * the bits of a IEEE-754 float.
  * The float value represents the timeval's value in microsecond units.
  */
 static uint32_t
 encode_timeval(struct timeval tv)
 {
 	int log2_s;
 	int val, exp;	/* Unnormalized value and exponent */
 	int norm_exp;	/* Normalized exponent */
 	int shift;
 
 	/*
 	 * First calculate value and exponent to about CALC_BITS precision.
 	 * Note that the following conditionals have been ordered so that
 	 * the most common cases appear first.
 	 */
 	if (tv.tv_sec == 0) {
 		if (tv.tv_usec == 0)
 			return (0);
 		exp = 0;
 		val = tv.tv_usec;
 	} else {
 		/*
 		 * Calculate the value to a precision of approximately
 		 * CALC_BITS.
 		 */
 		log2_s = fls(tv.tv_sec) - 1;
 		if (log2_s + LOG2_1M < CALC_BITS) {
 			exp = 0;
 			val = 1000000 * tv.tv_sec + tv.tv_usec;
 		} else {
 			exp = log2_s + LOG2_1M - CALC_BITS;
 			val = (unsigned int)(((uint64_t)1000000 * tv.tv_sec +
 			    tv.tv_usec) >> exp);
 		}
 	}
 	/* Now normalize and pack the value into an IEEE-754 float. */
 	norm_exp = fls(val) - 1;
 	shift = FLT_MANT_DIG - norm_exp - 1;
 #ifdef ACCT_DEBUG
 	printf("val=%d exp=%d shift=%d log2(val)=%d\n",
 	    val, exp, shift, norm_exp);
 	printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,
 	    ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));
 #endif
 	return (((FLT_MAX_EXP - 1 + exp + norm_exp) << (FLT_MANT_DIG - 1)) |
 	    ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));
 }
 
 /*
  * Convert a non-negative long value into the bit pattern of
  * an IEEE-754 float value.
  */
 static uint32_t
 encode_long(long val)
 {
 	int norm_exp;	/* Normalized exponent */
 	int shift;
 
 	if (val == 0)
 		return (0);
 	if (val < 0) {
 		log(LOG_NOTICE,
 		    "encode_long: negative value %ld in accounting record\n",
 		    val);
 		val = LONG_MAX;
 	}
 	norm_exp = fls(val) - 1;
 	shift = FLT_MANT_DIG - norm_exp - 1;
 #ifdef ACCT_DEBUG
 	printf("val=%d shift=%d log2(val)=%d\n",
 	    val, shift, norm_exp);
 	printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,
 	    ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));
 #endif
 	return (((FLT_MAX_EXP - 1 + norm_exp) << (FLT_MANT_DIG - 1)) |
 	    ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));
 }
 
 /* FLOAT_CONVERSION_END (Regression testing; don't remove this line.) */
 
 /*
  * Periodically check the filesystem to see if accounting
  * should be turned on or off.  Beware the case where the vnode
  * has been vgone()'d out from underneath us, e.g. when the file
  * system containing the accounting file has been forcibly unmounted.
  */
 /* ARGSUSED */
 static void
 acctwatch(void)
 {
 	struct statfs *sp;
 
 	sx_assert(&acct_sx, SX_XLOCKED);
 
 	/*
 	 * If accounting was disabled before our kthread was scheduled,
 	 * then acct_vp might be NULL.  If so, just ask our kthread to
 	 * exit and return.
 	 */
 	if (acct_vp == NULL) {
 		acct_state |= ACCT_EXITREQ;
 		return;
 	}
 
 	/*
 	 * If our vnode is no longer valid, tear it down and signal the
 	 * accounting thread to die.
 	 */
 	if (acct_vp->v_type == VBAD) {
 		(void) acct_disable(NULL, 1);
 		acct_state |= ACCT_EXITREQ;
 		return;
 	}
 
 	/*
 	 * Stopping here is better than continuing, maybe it will be VBAD
 	 * next time around.
 	 */
 	sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	if (VFS_STATFS(acct_vp->v_mount, sp) < 0) {
 		free(sp, M_STATFS);
 		return;
 	}
 	if (acct_suspended) {
 		if (sp->f_bavail > (int64_t)(acctresume * sp->f_blocks /
 		    100)) {
 			acct_suspended = 0;
 			log(LOG_NOTICE, "Accounting resumed\n");
 		}
 	} else {
 		if (sp->f_bavail <= (int64_t)(acctsuspend * sp->f_blocks /
 		    100)) {
 			acct_suspended = 1;
 			log(LOG_NOTICE, "Accounting suspended\n");
 		}
 	}
 	free(sp, M_STATFS);
 }
 
 /*
  * The main loop for the dedicated kernel thread that periodically calls
  * acctwatch().
  */
 static void
 acct_thread(void *dummy)
 {
 	u_char pri;
 
 	/* This is a low-priority kernel thread. */
 	pri = PRI_MAX_KERN;
 	thread_lock(curthread);
 	sched_prio(curthread, pri);
 	thread_unlock(curthread);
 
 	/* If another accounting kthread is already running, just die. */
 	sx_xlock(&acct_sx);
 	if (acct_state & ACCT_RUNNING) {
 		sx_xunlock(&acct_sx);
 		kproc_exit(0);
 	}
 	acct_state |= ACCT_RUNNING;
 
 	/* Loop until we are asked to exit. */
 	while (!(acct_state & ACCT_EXITREQ)) {
 
 		/* Perform our periodic checks. */
 		acctwatch();
 
 		/*
 		 * We check this flag again before sleeping since the
 		 * acctwatch() might have shut down accounting and asked us
 		 * to exit.
 		 */
 		if (!(acct_state & ACCT_EXITREQ)) {
 			sx_sleep(&acct_state, &acct_sx, 0, "-",
 			    acctchkfreq * hz);
 		}
 	}
 
 	/*
 	 * Acknowledge the exit request and shutdown.  We clear both the
 	 * exit request and running flags.
 	 */
 	acct_state = 0;
 	sx_xunlock(&acct_sx);
 	kproc_exit(0);
 }
Index: head/sys/kern/kern_descrip.c
===================================================================
--- head/sys/kern/kern_descrip.c	(revision 318735)
+++ head/sys/kern/kern_descrip.c	(revision 318736)
@@ -1,4129 +1,4164 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/selinfo.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/signalvar.h>
 #include <sys/kdb.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 
 #include <ddb/ddb.h>
 
 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
     "file desc to leader structures");
 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
 MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities");
 
 MALLOC_DECLARE(M_FADVISE);
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
 
 static int	closefp(struct filedesc *fdp, int fd, struct file *fp,
 		    struct thread *td, int holdleaders);
 static int	fd_first_free(struct filedesc *fdp, int low, int size);
 static int	fd_last_used(struct filedesc *fdp, int size);
 static void	fdgrowtable(struct filedesc *fdp, int nfd);
 static void	fdgrowtable_exp(struct filedesc *fdp, int nfd);
 static void	fdunused(struct filedesc *fdp, int fd);
 static void	fdused(struct filedesc *fdp, int fd);
 static int	getmaxfd(struct thread *td);
 
 /*
  * Each process has:
  *
  * - An array of open file descriptors (fd_ofiles)
  * - An array of file flags (fd_ofileflags)
  * - A bitmap recording which descriptors are in use (fd_map)
  *
  * A process starts out with NDFILE descriptors.  The value of NDFILE has
  * been selected based the historical limit of 20 open files, and an
  * assumption that the majority of processes, especially short-lived
  * processes like shells, will never need more.
  *
  * If this initial allocation is exhausted, a larger descriptor table and
  * map are allocated dynamically, and the pointers in the process's struct
  * filedesc are updated to point to those.  This is repeated every time
  * the process runs out of file descriptors (provided it hasn't hit its
  * resource limit).
  *
  * Since threads may hold references to individual descriptor table
  * entries, the tables are never freed.  Instead, they are placed on a
  * linked list and freed only when the struct filedesc is released.
  */
 #define NDFILE		20
 #define NDSLOTSIZE	sizeof(NDSLOTTYPE)
 #define	NDENTRIES	(NDSLOTSIZE * __CHAR_BIT)
 #define NDSLOT(x)	((x) / NDENTRIES)
 #define NDBIT(x)	((NDSLOTTYPE)1 << ((x) % NDENTRIES))
 #define	NDSLOTS(x)	(((x) + NDENTRIES - 1) / NDENTRIES)
 
 /*
  * SLIST entry used to keep track of ofiles which must be reclaimed when
  * the process exits.
  */
 struct freetable {
 	struct fdescenttbl *ft_table;
 	SLIST_ENTRY(freetable) ft_next;
 };
 
 /*
  * Initial allocation: a filedesc structure + the head of SLIST used to
  * keep track of old ofiles + enough space for NDFILE descriptors.
  */
 
 struct fdescenttbl0 {
 	int	fdt_nfiles;
 	struct	filedescent fdt_ofiles[NDFILE];
 };
 
 struct filedesc0 {
 	struct filedesc fd_fd;
 	SLIST_HEAD(, freetable) fd_free;
 	struct	fdescenttbl0 fd_dfiles;
 	NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
 };
 
 /*
  * Descriptor management.
  */
 volatile int __exclusive_cache_line openfiles; /* actual number of open files */
 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
 void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
 
 /*
  * If low >= size, just return low. Otherwise find the first zero bit in the
  * given bitmap, starting at low and not exceeding size - 1. Return size if
  * not found.
  */
 static int
 fd_first_free(struct filedesc *fdp, int low, int size)
 {
 	NDSLOTTYPE *map = fdp->fd_map;
 	NDSLOTTYPE mask;
 	int off, maxoff;
 
 	if (low >= size)
 		return (low);
 
 	off = NDSLOT(low);
 	if (low % NDENTRIES) {
 		mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
 		if ((mask &= ~map[off]) != 0UL)
 			return (off * NDENTRIES + ffsl(mask) - 1);
 		++off;
 	}
 	for (maxoff = NDSLOTS(size); off < maxoff; ++off)
 		if (map[off] != ~0UL)
 			return (off * NDENTRIES + ffsl(~map[off]) - 1);
 	return (size);
 }
 
 /*
  * Find the highest non-zero bit in the given bitmap, starting at 0 and
  * not exceeding size - 1. Return -1 if not found.
  */
 static int
 fd_last_used(struct filedesc *fdp, int size)
 {
 	NDSLOTTYPE *map = fdp->fd_map;
 	NDSLOTTYPE mask;
 	int off, minoff;
 
 	off = NDSLOT(size);
 	if (size % NDENTRIES) {
 		mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
 		if ((mask &= map[off]) != 0)
 			return (off * NDENTRIES + flsl(mask) - 1);
 		--off;
 	}
 	for (minoff = NDSLOT(0); off >= minoff; --off)
 		if (map[off] != 0)
 			return (off * NDENTRIES + flsl(map[off]) - 1);
 	return (-1);
 }
 
 static int
 fdisused(struct filedesc *fdp, int fd)
 {
 
 	KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
 	    ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
 
 	return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
 }
 
 /*
  * Mark a file descriptor as used.
  */
 static void
 fdused_init(struct filedesc *fdp, int fd)
 {
 
 	KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd));
 
 	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
 }
 
 static void
 fdused(struct filedesc *fdp, int fd)
 {
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	fdused_init(fdp, fd);
 	if (fd > fdp->fd_lastfile)
 		fdp->fd_lastfile = fd;
 	if (fd == fdp->fd_freefile)
 		fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
 }
 
 /*
  * Mark a file descriptor as unused.
  */
 static void
 fdunused(struct filedesc *fdp, int fd)
 {
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
 	KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
 	    ("fd=%d is still in use", fd));
 
 	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
 	if (fd < fdp->fd_freefile)
 		fdp->fd_freefile = fd;
 	if (fd == fdp->fd_lastfile)
 		fdp->fd_lastfile = fd_last_used(fdp, fd);
 }
 
 /*
  * Free a file descriptor.
  *
  * Avoid some work if fdp is about to be destroyed.
  */
 static inline void
 fdefree_last(struct filedescent *fde)
 {
 
 	filecaps_free(&fde->fde_caps);
 }
 
 static inline void
 fdfree(struct filedesc *fdp, int fd)
 {
 	struct filedescent *fde;
 
 	fde = &fdp->fd_ofiles[fd];
 #ifdef CAPABILITIES
 	seq_write_begin(&fde->fde_seq);
 #endif
 	fdefree_last(fde);
 	fde->fde_file = NULL;
 	fdunused(fdp, fd);
 #ifdef CAPABILITIES
 	seq_write_end(&fde->fde_seq);
 #endif
 }
 
 void
 pwd_ensure_dirs(void)
 {
 	struct filedesc *fdp;
 
 	fdp = curproc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	if (fdp->fd_cdir == NULL) {
 		fdp->fd_cdir = rootvnode;
 		vrefact(rootvnode);
 	}
 	if (fdp->fd_rdir == NULL) {
 		fdp->fd_rdir = rootvnode;
 		vrefact(rootvnode);
 	}
 	FILEDESC_XUNLOCK(fdp);
 }
 
 /*
  * System calls on descriptors.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdtablesize_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap)
 {
 #ifdef	RACCT
 	uint64_t lim;
 #endif
 
 	td->td_retval[0] =
 	    min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc);
 #ifdef	RACCT
 	PROC_LOCK(td->td_proc);
 	lim = racct_get_limit(td->td_proc, RACCT_NOFILE);
 	PROC_UNLOCK(td->td_proc);
 	if (lim < td->td_retval[0])
 		td->td_retval[0] = lim;
 #endif
 	return (0);
 }
 
 /*
  * Duplicate a file descriptor to a particular value.
  *
  * Note: keep in mind that a potential race condition exists when closing
  * descriptors from a shared descriptor table (via rfork).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup2_args {
 	u_int	from;
 	u_int	to;
 };
 #endif
 /* ARGSUSED */
 int
 sys_dup2(struct thread *td, struct dup2_args *uap)
 {
 
 	return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to));
 }
 
 /*
  * Duplicate a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup_args {
 	u_int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_dup(struct thread *td, struct dup_args *uap)
 {
 
 	return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0));
 }
 
 /*
  * The file control system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fcntl_args {
 	int	fd;
 	int	cmd;
 	long	arg;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fcntl(struct thread *td, struct fcntl_args *uap)
 {
 
 	return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg));
 }
 
 int
 kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg)
 {
 	struct flock fl;
 	struct __oflock ofl;
 	intptr_t arg1;
 	int error, newcmd;
 
 	error = 0;
 	newcmd = cmd;
 	switch (cmd) {
 	case F_OGETLK:
 	case F_OSETLK:
 	case F_OSETLKW:
 		/*
 		 * Convert old flock structure to new.
 		 */
 		error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl));
 		fl.l_start = ofl.l_start;
 		fl.l_len = ofl.l_len;
 		fl.l_pid = ofl.l_pid;
 		fl.l_type = ofl.l_type;
 		fl.l_whence = ofl.l_whence;
 		fl.l_sysid = 0;
 
 		switch (cmd) {
 		case F_OGETLK:
 			newcmd = F_GETLK;
 			break;
 		case F_OSETLK:
 			newcmd = F_SETLK;
 			break;
 		case F_OSETLKW:
 			newcmd = F_SETLKW;
 			break;
 		}
 		arg1 = (intptr_t)&fl;
 		break;
 	case F_GETLK:
 	case F_SETLK:
 	case F_SETLKW:
 	case F_SETLK_REMOTE:
 		error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl));
 		arg1 = (intptr_t)&fl;
 		break;
 	default:
 		arg1 = arg;
 		break;
 	}
 	if (error)
 		return (error);
 	error = kern_fcntl(td, fd, newcmd, arg1);
 	if (error)
 		return (error);
 	if (cmd == F_OGETLK) {
 		ofl.l_start = fl.l_start;
 		ofl.l_len = fl.l_len;
 		ofl.l_pid = fl.l_pid;
 		ofl.l_type = fl.l_type;
 		ofl.l_whence = fl.l_whence;
 		error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl));
 	} else if (cmd == F_GETLK) {
 		error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl));
 	}
 	return (error);
 }
 
 int
 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 {
 	struct filedesc *fdp;
 	struct flock *flp;
 	struct file *fp, *fp2;
 	struct filedescent *fde;
 	struct proc *p;
 	struct vnode *vp;
 	cap_rights_t rights;
 	int error, flg, tmp;
 	uint64_t bsize;
 	off_t foffset;
 
 	error = 0;
 	flg = F_POSIX;
 	p = td->td_proc;
 	fdp = p->p_fd;
 
 	AUDIT_ARG_FD(cmd);
 	AUDIT_ARG_CMD(cmd);
 	switch (cmd) {
 	case F_DUPFD:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp);
 		break;
 
 	case F_DUPFD_CLOEXEC:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp);
 		break;
 
 	case F_DUP2FD:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp);
 		break;
 
 	case F_DUP2FD_CLOEXEC:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp);
 		break;
 
 	case F_GETFD:
 		error = EBADF;
 		FILEDESC_SLOCK(fdp);
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
 			td->td_retval[0] =
 			    (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
 			error = 0;
 		}
 		FILEDESC_SUNLOCK(fdp);
 		break;
 
 	case F_SETFD:
 		error = EBADF;
 		FILEDESC_XLOCK(fdp);
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
 			fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
 			    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
 			error = 0;
 		}
 		FILEDESC_XUNLOCK(fdp);
 		break;
 
 	case F_GETFL:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_GETFL, &fp);
 		if (error != 0)
 			break;
 		td->td_retval[0] = OFLAGS(fp->f_flag);
 		fdrop(fp, td);
 		break;
 
 	case F_SETFL:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_SETFL, &fp);
 		if (error != 0)
 			break;
 		do {
 			tmp = flg = fp->f_flag;
 			tmp &= ~FCNTLFLAGS;
 			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
 		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
 		tmp = fp->f_flag & FNONBLOCK;
 		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		tmp = fp->f_flag & FASYNC;
 		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
 		if (error == 0) {
 			fdrop(fp, td);
 			break;
 		}
 		atomic_clear_int(&fp->f_flag, FNONBLOCK);
 		tmp = 0;
 		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		fdrop(fp, td);
 		break;
 
 	case F_GETOWN:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_GETOWN, &fp);
 		if (error != 0)
 			break;
 		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
 		if (error == 0)
 			td->td_retval[0] = tmp;
 		fdrop(fp, td);
 		break;
 
 	case F_SETOWN:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_SETOWN, &fp);
 		if (error != 0)
 			break;
 		tmp = arg;
 		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
 		fdrop(fp, td);
 		break;
 
 	case F_SETLK_REMOTE:
 		error = priv_check(td, PRIV_NFS_LOCKD);
 		if (error)
 			return (error);
 		flg = F_REMOTE;
 		goto do_setlk;
 
 	case F_SETLKW:
 		flg |= F_WAIT;
 		/* FALLTHROUGH F_SETLK */
 
 	case F_SETLK:
 	do_setlk:
 		cap_rights_init(&rights, CAP_FLOCK);
 		error = fget_unlocked(fdp, fd, &rights, &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			fdrop(fp, td);
 			break;
 		}
 
 		flp = (struct flock *)arg;
 		if (flp->l_whence == SEEK_CUR) {
 			foffset = foffset_get(fp);
 			if (foffset < 0 ||
 			    (flp->l_start > 0 &&
 			     foffset > OFF_MAX - flp->l_start)) {
 				error = EOVERFLOW;
 				fdrop(fp, td);
 				break;
 			}
 			flp->l_start += foffset;
 		}
 
 		vp = fp->f_vnode;
 		switch (flp->l_type) {
 		case F_RDLCK:
 			if ((fp->f_flag & FREAD) == 0) {
 				error = EBADF;
 				break;
 			}
 			PROC_LOCK(p->p_leader);
 			p->p_leader->p_flag |= P_ADVLOCK;
 			PROC_UNLOCK(p->p_leader);
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
 			    flp, flg);
 			break;
 		case F_WRLCK:
 			if ((fp->f_flag & FWRITE) == 0) {
 				error = EBADF;
 				break;
 			}
 			PROC_LOCK(p->p_leader);
 			p->p_leader->p_flag |= P_ADVLOCK;
 			PROC_UNLOCK(p->p_leader);
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
 			    flp, flg);
 			break;
 		case F_UNLCK:
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
 			    flp, flg);
 			break;
 		case F_UNLCKSYS:
 			/*
 			 * Temporary api for testing remote lock
 			 * infrastructure.
 			 */
 			if (flg != F_REMOTE) {
 				error = EINVAL;
 				break;
 			}
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
 			    F_UNLCKSYS, flp, flg);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		if (error != 0 || flp->l_type == F_UNLCK ||
 		    flp->l_type == F_UNLCKSYS) {
 			fdrop(fp, td);
 			break;
 		}
 
 		/*
 		 * Check for a race with close.
 		 *
 		 * The vnode is now advisory locked (or unlocked, but this case
 		 * is not really important) as the caller requested.
 		 * We had to drop the filedesc lock, so we need to recheck if
 		 * the descriptor is still valid, because if it was closed
 		 * in the meantime we need to remove advisory lock from the
 		 * vnode - close on any descriptor leading to an advisory
 		 * locked vnode, removes that lock.
 		 * We will return 0 on purpose in that case, as the result of
 		 * successful advisory lock might have been externally visible
 		 * already. This is fine - effectively we pretend to the caller
 		 * that the closing thread was a bit slower and that the
 		 * advisory lock succeeded before the close.
 		 */
 		error = fget_unlocked(fdp, fd, &rights, &fp2, NULL);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		if (fp != fp2) {
 			flp->l_whence = SEEK_SET;
 			flp->l_start = 0;
 			flp->l_len = 0;
 			flp->l_type = F_UNLCK;
 			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
 			    F_UNLCK, flp, F_POSIX);
 		}
 		fdrop(fp, td);
 		fdrop(fp2, td);
 		break;
 
 	case F_GETLK:
 		error = fget_unlocked(fdp, fd,
 		    cap_rights_init(&rights, CAP_FLOCK), &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			fdrop(fp, td);
 			break;
 		}
 		flp = (struct flock *)arg;
 		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
 		    flp->l_type != F_UNLCK) {
 			error = EINVAL;
 			fdrop(fp, td);
 			break;
 		}
 		if (flp->l_whence == SEEK_CUR) {
 			foffset = foffset_get(fp);
 			if ((flp->l_start > 0 &&
 			    foffset > OFF_MAX - flp->l_start) ||
 			    (flp->l_start < 0 &&
 			    foffset < OFF_MIN - flp->l_start)) {
 				error = EOVERFLOW;
 				fdrop(fp, td);
 				break;
 			}
 			flp->l_start += foffset;
 		}
 		vp = fp->f_vnode;
 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
 		    F_POSIX);
 		fdrop(fp, td);
 		break;
 
 	case F_RDAHEAD:
 		arg = arg ? 128 * 1024: 0;
 		/* FALLTHROUGH */
 	case F_READAHEAD:
 		error = fget_unlocked(fdp, fd,
 		    cap_rights_init(&rights), &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			fdrop(fp, td);
 			error = EBADF;
 			break;
 		}
 		vp = fp->f_vnode;
 		/*
 		 * Exclusive lock synchronizes against f_seqcount reads and
 		 * writes in sequential_heuristic().
 		 */
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		if (arg >= 0) {
 			bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
 			fp->f_seqcount = (arg + bsize - 1) / bsize;
 			atomic_set_int(&fp->f_flag, FRDAHEAD);
 		} else {
 			atomic_clear_int(&fp->f_flag, FRDAHEAD);
 		}
 		VOP_UNLOCK(vp, 0);
 		fdrop(fp, td);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static int
 getmaxfd(struct thread *td)
 {
 
 	return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc));
 }
 
 /*
  * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
  */
 int
 kern_dup(struct thread *td, u_int mode, int flags, int old, int new)
 {
 	struct filedesc *fdp;
 	struct filedescent *oldfde, *newfde;
 	struct proc *p;
 	struct file *delfp;
 	int error, maxfd;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 
 	MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0);
 	MPASS(mode < FDDUP_LASTMODE);
 
 	AUDIT_ARG_FD(old);
 	/* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */
 
 	/*
 	 * Verify we have a valid descriptor to dup from and possibly to
 	 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
 	 * return EINVAL when the new descriptor is out of bounds.
 	 */
 	if (old < 0)
 		return (EBADF);
 	if (new < 0)
 		return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
 	maxfd = getmaxfd(td);
 	if (new >= maxfd)
 		return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
 
 	error = EBADF;
 	FILEDESC_XLOCK(fdp);
 	if (fget_locked(fdp, old) == NULL)
 		goto unlock;
 	if ((mode == FDDUP_FIXED || mode == FDDUP_MUSTREPLACE) && old == new) {
 		td->td_retval[0] = new;
 		if (flags & FDDUP_FLAG_CLOEXEC)
 			fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE;
 		error = 0;
 		goto unlock;
 	}
 
 	/*
 	 * If the caller specified a file descriptor, make sure the file
 	 * table is large enough to hold it, and grab it.  Otherwise, just
 	 * allocate a new descriptor the usual way.
 	 */
 	switch (mode) {
 	case FDDUP_NORMAL:
 	case FDDUP_FCNTL:
 		if ((error = fdalloc(td, new, &new)) != 0)
 			goto unlock;
 		break;
 	case FDDUP_MUSTREPLACE:
 		/* Target file descriptor must exist. */
 		if (fget_locked(fdp, new) == NULL)
 			goto unlock;
 		break;
 	case FDDUP_FIXED:
 		if (new >= fdp->fd_nfiles) {
 			/*
 			 * The resource limits are here instead of e.g.
 			 * fdalloc(), because the file descriptor table may be
 			 * shared between processes, so we can't really use
 			 * racct_add()/racct_sub().  Instead of counting the
 			 * number of actually allocated descriptors, just put
 			 * the limit on the size of the file descriptor table.
 			 */
 #ifdef RACCT
 			if (racct_enable) {
 				PROC_LOCK(p);
 				error = racct_set(p, RACCT_NOFILE, new + 1);
 				PROC_UNLOCK(p);
 				if (error != 0) {
 					error = EMFILE;
 					goto unlock;
 				}
 			}
 #endif
 			fdgrowtable_exp(fdp, new + 1);
 		}
 		if (!fdisused(fdp, new))
 			fdused(fdp, new);
 		break;
 	default:
 		KASSERT(0, ("%s unsupported mode %d", __func__, mode));
 	}
 
 	KASSERT(old != new, ("new fd is same as old"));
 
 	oldfde = &fdp->fd_ofiles[old];
 	fhold(oldfde->fde_file);
 	newfde = &fdp->fd_ofiles[new];
 	delfp = newfde->fde_file;
 
 	/*
 	 * Duplicate the source descriptor.
 	 */
 #ifdef CAPABILITIES
 	seq_write_begin(&newfde->fde_seq);
 #endif
 	filecaps_free(&newfde->fde_caps);
 	memcpy(newfde, oldfde, fde_change_size);
 	filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true);
 	if ((flags & FDDUP_FLAG_CLOEXEC) != 0)
 		newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE;
 	else
 		newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE;
 #ifdef CAPABILITIES
 	seq_write_end(&newfde->fde_seq);
 #endif
 	td->td_retval[0] = new;
 
 	error = 0;
 
 	if (delfp != NULL) {
 		(void) closefp(fdp, new, delfp, td, 1);
 		FILEDESC_UNLOCK_ASSERT(fdp);
 	} else {
 unlock:
 		FILEDESC_XUNLOCK(fdp);
 	}
 
 	return (error);
 }
 
 /*
  * If sigio is on the list associated with a process or process group,
  * disable signalling from the device, remove sigio from the list and
  * free sigio.
  */
 void
 funsetown(struct sigio **sigiop)
 {
 	struct sigio *sigio;
 
 	if (*sigiop == NULL)
 		return;
 	SIGIO_LOCK();
 	sigio = *sigiop;
 	if (sigio == NULL) {
 		SIGIO_UNLOCK();
 		return;
 	}
 	*(sigio->sio_myref) = NULL;
 	if ((sigio)->sio_pgid < 0) {
 		struct pgrp *pg = (sigio)->sio_pgrp;
 		PGRP_LOCK(pg);
 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
 			    sigio, sio_pgsigio);
 		PGRP_UNLOCK(pg);
 	} else {
 		struct proc *p = (sigio)->sio_proc;
 		PROC_LOCK(p);
 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
 			    sigio, sio_pgsigio);
 		PROC_UNLOCK(p);
 	}
 	SIGIO_UNLOCK();
 	crfree(sigio->sio_ucred);
 	free(sigio, M_SIGIO);
 }
 
 /*
  * Free a list of sigio structures.
  * We only need to lock the SIGIO_LOCK because we have made ourselves
  * inaccessible to callers of fsetown and therefore do not need to lock
  * the proc or pgrp struct for the list manipulation.
  */
 void
 funsetownlst(struct sigiolst *sigiolst)
 {
 	struct proc *p;
 	struct pgrp *pg;
 	struct sigio *sigio;
 
 	sigio = SLIST_FIRST(sigiolst);
 	if (sigio == NULL)
 		return;
 	p = NULL;
 	pg = NULL;
 
 	/*
 	 * Every entry of the list should belong
 	 * to a single proc or pgrp.
 	 */
 	if (sigio->sio_pgid < 0) {
 		pg = sigio->sio_pgrp;
 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
 	} else /* if (sigio->sio_pgid > 0) */ {
 		p = sigio->sio_proc;
 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	}
 
 	SIGIO_LOCK();
 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
 		*(sigio->sio_myref) = NULL;
 		if (pg != NULL) {
 			KASSERT(sigio->sio_pgid < 0,
 			    ("Proc sigio in pgrp sigio list"));
 			KASSERT(sigio->sio_pgrp == pg,
 			    ("Bogus pgrp in sigio list"));
 			PGRP_LOCK(pg);
 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
 			    sio_pgsigio);
 			PGRP_UNLOCK(pg);
 		} else /* if (p != NULL) */ {
 			KASSERT(sigio->sio_pgid > 0,
 			    ("Pgrp sigio in proc sigio list"));
 			KASSERT(sigio->sio_proc == p,
 			    ("Bogus proc in sigio list"));
 			PROC_LOCK(p);
 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
 			    sio_pgsigio);
 			PROC_UNLOCK(p);
 		}
 		SIGIO_UNLOCK();
 		crfree(sigio->sio_ucred);
 		free(sigio, M_SIGIO);
 		SIGIO_LOCK();
 	}
 	SIGIO_UNLOCK();
 }
 
 /*
  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
  *
  * After permission checking, add a sigio structure to the sigio list for
  * the process or process group.
  */
 int
 fsetown(pid_t pgid, struct sigio **sigiop)
 {
 	struct proc *proc;
 	struct pgrp *pgrp;
 	struct sigio *sigio;
 	int ret;
 
 	if (pgid == 0) {
 		funsetown(sigiop);
 		return (0);
 	}
 
 	ret = 0;
 
 	/* Allocate and fill in the new sigio out of locks. */
 	sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
 	sigio->sio_pgid = pgid;
 	sigio->sio_ucred = crhold(curthread->td_ucred);
 	sigio->sio_myref = sigiop;
 
 	sx_slock(&proctree_lock);
 	if (pgid > 0) {
 		proc = pfind(pgid);
 		if (proc == NULL) {
 			ret = ESRCH;
 			goto fail;
 		}
 
 		/*
 		 * Policy - Don't allow a process to FSETOWN a process
 		 * in another session.
 		 *
 		 * Remove this test to allow maximum flexibility or
 		 * restrict FSETOWN to the current process or process
 		 * group for maximum safety.
 		 */
 		PROC_UNLOCK(proc);
 		if (proc->p_session != curthread->td_proc->p_session) {
 			ret = EPERM;
 			goto fail;
 		}
 
 		pgrp = NULL;
 	} else /* if (pgid < 0) */ {
 		pgrp = pgfind(-pgid);
 		if (pgrp == NULL) {
 			ret = ESRCH;
 			goto fail;
 		}
 		PGRP_UNLOCK(pgrp);
 
 		/*
 		 * Policy - Don't allow a process to FSETOWN a process
 		 * in another session.
 		 *
 		 * Remove this test to allow maximum flexibility or
 		 * restrict FSETOWN to the current process or process
 		 * group for maximum safety.
 		 */
 		if (pgrp->pg_session != curthread->td_proc->p_session) {
 			ret = EPERM;
 			goto fail;
 		}
 
 		proc = NULL;
 	}
 	funsetown(sigiop);
 	if (pgid > 0) {
 		PROC_LOCK(proc);
 		/*
 		 * Since funsetownlst() is called without the proctree
 		 * locked, we need to check for P_WEXIT.
 		 * XXX: is ESRCH correct?
 		 */
 		if ((proc->p_flag & P_WEXIT) != 0) {
 			PROC_UNLOCK(proc);
 			ret = ESRCH;
 			goto fail;
 		}
 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
 		sigio->sio_proc = proc;
 		PROC_UNLOCK(proc);
 	} else {
 		PGRP_LOCK(pgrp);
 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
 		sigio->sio_pgrp = pgrp;
 		PGRP_UNLOCK(pgrp);
 	}
 	sx_sunlock(&proctree_lock);
 	SIGIO_LOCK();
 	*sigiop = sigio;
 	SIGIO_UNLOCK();
 	return (0);
 
 fail:
 	sx_sunlock(&proctree_lock);
 	crfree(sigio->sio_ucred);
 	free(sigio, M_SIGIO);
 	return (ret);
 }
 
 /*
  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
  */
 pid_t
 fgetown(sigiop)
 	struct sigio **sigiop;
 {
 	pid_t pgid;
 
 	SIGIO_LOCK();
 	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
 	SIGIO_UNLOCK();
 	return (pgid);
 }
 
 /*
  * Function drops the filedesc lock on return.
  */
 static int
 closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
     int holdleaders)
 {
 	int error;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	if (holdleaders) {
 		if (td->td_proc->p_fdtol != NULL) {
 			/*
 			 * Ask fdfree() to sleep to ensure that all relevant
 			 * process leaders can be traversed in closef().
 			 */
 			fdp->fd_holdleaderscount++;
 		} else {
 			holdleaders = 0;
 		}
 	}
 
 	/*
 	 * We now hold the fp reference that used to be owned by the
 	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
 	 * knote_fdclose to prevent a race of the fd getting opened, a knote
 	 * added, and deleteing a knote for the new fd.
 	 */
 	knote_fdclose(td, fd);
 
 	/*
 	 * We need to notify mqueue if the object is of type mqueue.
 	 */
 	if (fp->f_type == DTYPE_MQUEUE)
 		mq_fdclose(td, fd, fp);
 	FILEDESC_XUNLOCK(fdp);
 
 	error = closef(fp, td);
 	if (holdleaders) {
 		FILEDESC_XLOCK(fdp);
 		fdp->fd_holdleaderscount--;
 		if (fdp->fd_holdleaderscount == 0 &&
 		    fdp->fd_holdleaderswakeup != 0) {
 			fdp->fd_holdleaderswakeup = 0;
 			wakeup(&fdp->fd_holdleaderscount);
 		}
 		FILEDESC_XUNLOCK(fdp);
 	}
 	return (error);
 }
 
 /*
  * Close a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct close_args {
 	int     fd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_close(struct thread *td, struct close_args *uap)
 {
 
 	return (kern_close(td, uap->fd));
 }
 
 int
 kern_close(struct thread *td, int fd)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 
 	fdp = td->td_proc->p_fd;
 
 	AUDIT_SYSCLOSE(td, fd);
 
 	FILEDESC_XLOCK(fdp);
 	if ((fp = fget_locked(fdp, fd)) == NULL) {
 		FILEDESC_XUNLOCK(fdp);
 		return (EBADF);
 	}
 	fdfree(fdp, fd);
 
 	/* closefp() drops the FILEDESC lock for us. */
 	return (closefp(fdp, fd, fp, td, 1));
 }
 
 /*
  * Close open file descriptors.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct closefrom_args {
 	int	lowfd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_closefrom(struct thread *td, struct closefrom_args *uap)
 {
 	struct filedesc *fdp;
 	int fd;
 
 	fdp = td->td_proc->p_fd;
 	AUDIT_ARG_FD(uap->lowfd);
 
 	/*
 	 * Treat negative starting file descriptor values identical to
 	 * closefrom(0) which closes all files.
 	 */
 	if (uap->lowfd < 0)
 		uap->lowfd = 0;
 	FILEDESC_SLOCK(fdp);
 	for (fd = uap->lowfd; fd <= fdp->fd_lastfile; fd++) {
 		if (fdp->fd_ofiles[fd].fde_file != NULL) {
 			FILEDESC_SUNLOCK(fdp);
 			(void)kern_close(td, fd);
 			FILEDESC_SLOCK(fdp);
 		}
 	}
 	FILEDESC_SUNLOCK(fdp);
 	return (0);
 }
 
 #if defined(COMPAT_43)
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ofstat_args {
 	int	fd;
 	struct	ostat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 ofstat(struct thread *td, struct ofstat_args *uap)
 {
 	struct ostat oub;
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0) {
 		cvtstat(&ub, &oub);
 		error = copyout(&oub, uap->sb, sizeof(oub));
 	}
 	return (error);
 }
 #endif /* COMPAT_43 */
 
+#if defined(COMPAT_FREEBSD11)
+int
+freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap)
+{
+	struct stat sb;
+	struct freebsd11_stat osb;
+	int error;
+
+	error = kern_fstat(td, uap->fd, &sb);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat(&sb, &osb);
+	error = copyout(&osb, uap->sb, sizeof(osb));
+	return (error);
+}
+#endif	/* COMPAT_FREEBSD11 */
+
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstat_args {
 	int	fd;
 	struct	stat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fstat(struct thread *td, struct fstat_args *uap)
 {
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0)
 		error = copyout(&ub, uap->sb, sizeof(ub));
 	return (error);
 }
 
 int
 kern_fstat(struct thread *td, int fd, struct stat *sbp)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 
 	error = fget(td, fd, cap_rights_init(&rights, CAP_FSTAT), &fp);
 	if (error != 0)
 		return (error);
 
 	AUDIT_ARG_FILE(td->td_proc, fp);
 
 	error = fo_stat(fp, sbp, td->td_ucred, td);
 	fdrop(fp, td);
+#ifdef __STAT_TIME_T_EXT
+	if (error == 0) {
+		sbp->st_atim_ext = 0;
+		sbp->st_mtim_ext = 0;
+		sbp->st_ctim_ext = 0;
+		sbp->st_btim_ext = 0;
+	}
+#endif
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
 		ktrstat(sbp);
 #endif
 	return (error);
 }
 
+#if defined(COMPAT_FREEBSD11)
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
-struct nfstat_args {
+struct freebsd11_nfstat_args {
 	int	fd;
 	struct	nstat *sb;
 };
 #endif
 /* ARGSUSED */
 int
-sys_nfstat(struct thread *td, struct nfstat_args *uap)
+freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap)
 {
 	struct nstat nub;
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0) {
-		cvtnstat(&ub, &nub);
+		freebsd11_cvtnstat(&ub, &nub);
 		error = copyout(&nub, uap->sb, sizeof(nub));
 	}
 	return (error);
 }
+#endif /* COMPAT_FREEBSD11 */
 
 /*
  * Return pathconf information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fpathconf_args {
 	int	fd;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fpathconf(struct thread *td, struct fpathconf_args *uap)
 {
 	struct file *fp;
 	struct vnode *vp;
 	cap_rights_t rights;
 	int error;
 
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp);
 	if (error != 0)
 		return (error);
 
 	if (uap->name == _PC_ASYNC_IO) {
 		td->td_retval[0] = _POSIX_ASYNCHRONOUS_IO;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp != NULL) {
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
 		VOP_UNLOCK(vp, 0);
 	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
 		if (uap->name != _PC_PIPE_BUF) {
 			error = EINVAL;
 		} else {
 			td->td_retval[0] = PIPE_BUF;
 			error = 0;
 		}
 	} else {
 		error = EOPNOTSUPP;
 	}
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Initialize filecaps structure.
  */
 void
 filecaps_init(struct filecaps *fcaps)
 {
 
 	bzero(fcaps, sizeof(*fcaps));
 	fcaps->fc_nioctls = -1;
 }
 
 /*
  * Copy filecaps structure allocating memory for ioctls array if needed.
  *
  * The last parameter indicates whether the fdtable is locked. If it is not and
  * ioctls are encountered, copying fails and the caller must lock the table.
  *
  * Note that if the table was not locked, the caller has to check the relevant
  * sequence counter to determine whether the operation was successful.
  */
 int
 filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked)
 {
 	size_t size;
 
 	*dst = *src;
 	if (src->fc_ioctls == NULL)
 		return (0);
 	if (!locked)
 		return (1);
 
 	KASSERT(src->fc_nioctls > 0,
 	    ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
 
 	size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
 	dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK);
 	bcopy(src->fc_ioctls, dst->fc_ioctls, size);
 	return (0);
 }
 
 /*
  * Move filecaps structure to the new place and clear the old place.
  */
 void
 filecaps_move(struct filecaps *src, struct filecaps *dst)
 {
 
 	*dst = *src;
 	bzero(src, sizeof(*src));
 }
 
 /*
  * Fill the given filecaps structure with full rights.
  */
 static void
 filecaps_fill(struct filecaps *fcaps)
 {
 
 	CAP_ALL(&fcaps->fc_rights);
 	fcaps->fc_ioctls = NULL;
 	fcaps->fc_nioctls = -1;
 	fcaps->fc_fcntls = CAP_FCNTL_ALL;
 }
 
 /*
  * Free memory allocated within filecaps structure.
  */
 void
 filecaps_free(struct filecaps *fcaps)
 {
 
 	free(fcaps->fc_ioctls, M_FILECAPS);
 	bzero(fcaps, sizeof(*fcaps));
 }
 
 /*
  * Validate the given filecaps structure.
  */
 static void
 filecaps_validate(const struct filecaps *fcaps, const char *func)
 {
 
 	KASSERT(cap_rights_is_valid(&fcaps->fc_rights),
 	    ("%s: invalid rights", func));
 	KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0,
 	    ("%s: invalid fcntls", func));
 	KASSERT(fcaps->fc_fcntls == 0 ||
 	    cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL),
 	    ("%s: fcntls without CAP_FCNTL", func));
 	KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 :
 	    (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0),
 	    ("%s: invalid ioctls", func));
 	KASSERT(fcaps->fc_nioctls == 0 ||
 	    cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL),
 	    ("%s: ioctls without CAP_IOCTL", func));
 }
 
 static void
 fdgrowtable_exp(struct filedesc *fdp, int nfd)
 {
 	int nfd1;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	nfd1 = fdp->fd_nfiles * 2;
 	if (nfd1 < nfd)
 		nfd1 = nfd;
 	fdgrowtable(fdp, nfd1);
 }
 
 /*
  * Grow the file table to accommodate (at least) nfd descriptors.
  */
 static void
 fdgrowtable(struct filedesc *fdp, int nfd)
 {
 	struct filedesc0 *fdp0;
 	struct freetable *ft;
 	struct fdescenttbl *ntable;
 	struct fdescenttbl *otable;
 	int nnfiles, onfiles;
 	NDSLOTTYPE *nmap, *omap;
 
 	/*
 	 * If lastfile is -1 this struct filedesc was just allocated and we are
 	 * growing it to accommodate for the one we are going to copy from. There
 	 * is no need to have a lock on this one as it's not visible to anyone.
 	 */
 	if (fdp->fd_lastfile != -1)
 		FILEDESC_XLOCK_ASSERT(fdp);
 
 	KASSERT(fdp->fd_nfiles > 0, ("zero-length file table"));
 
 	/* save old values */
 	onfiles = fdp->fd_nfiles;
 	otable = fdp->fd_files;
 	omap = fdp->fd_map;
 
 	/* compute the size of the new table */
 	nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
 	if (nnfiles <= onfiles)
 		/* the table is already large enough */
 		return;
 
 	/*
 	 * Allocate a new table.  We need enough space for the number of
 	 * entries, file entries themselves and the struct freetable we will use
 	 * when we decommission the table and place it on the freelist.
 	 * We place the struct freetable in the middle so we don't have
 	 * to worry about padding.
 	 */
 	ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) +
 	    nnfiles * sizeof(ntable->fdt_ofiles[0]) +
 	    sizeof(struct freetable),
 	    M_FILEDESC, M_ZERO | M_WAITOK);
 	/* copy the old data */
 	ntable->fdt_nfiles = nnfiles;
 	memcpy(ntable->fdt_ofiles, otable->fdt_ofiles,
 	    onfiles * sizeof(ntable->fdt_ofiles[0]));
 
 	/*
 	 * Allocate a new map only if the old is not large enough.  It will
 	 * grow at a slower rate than the table as it can map more
 	 * entries than the table can hold.
 	 */
 	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
 		nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
 		    M_ZERO | M_WAITOK);
 		/* copy over the old data and update the pointer */
 		memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
 		fdp->fd_map = nmap;
 	}
 
 	/*
 	 * Make sure that ntable is correctly initialized before we replace
 	 * fd_files poiner. Otherwise fget_unlocked() may see inconsistent
 	 * data.
 	 */
 	atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable);
 
 	/*
 	 * Do not free the old file table, as some threads may still
 	 * reference entries within it.  Instead, place it on a freelist
 	 * which will be processed when the struct filedesc is released.
 	 *
 	 * Note that if onfiles == NDFILE, we're dealing with the original
 	 * static allocation contained within (struct filedesc0 *)fdp,
 	 * which must not be freed.
 	 */
 	if (onfiles > NDFILE) {
 		ft = (struct freetable *)&otable->fdt_ofiles[onfiles];
 		fdp0 = (struct filedesc0 *)fdp;
 		ft->ft_table = otable;
 		SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next);
 	}
 	/*
 	 * The map does not have the same possibility of threads still
 	 * holding references to it.  So always free it as long as it
 	 * does not reference the original static allocation.
 	 */
 	if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
 		free(omap, M_FILEDESC);
 }
 
 /*
  * Allocate a file descriptor for the process.
  */
 int
 fdalloc(struct thread *td, int minfd, int *result)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	int fd, maxfd, allocfd;
 #ifdef RACCT
 	int error;
 #endif
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	if (fdp->fd_freefile > minfd)
 		minfd = fdp->fd_freefile;
 
 	maxfd = getmaxfd(td);
 
 	/*
 	 * Search the bitmap for a free descriptor starting at minfd.
 	 * If none is found, grow the file table.
 	 */
 	fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
 	if (fd >= maxfd)
 		return (EMFILE);
 	if (fd >= fdp->fd_nfiles) {
 		allocfd = min(fd * 2, maxfd);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(p);
 			error = racct_set(p, RACCT_NOFILE, allocfd);
 			PROC_UNLOCK(p);
 			if (error != 0)
 				return (EMFILE);
 		}
 #endif
 		/*
 		 * fd is already equal to first free descriptor >= minfd, so
 		 * we only need to grow the table and we are done.
 		 */
 		fdgrowtable_exp(fdp, allocfd);
 	}
 
 	/*
 	 * Perform some sanity checks, then mark the file descriptor as
 	 * used and return it to the caller.
 	 */
 	KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles),
 	    ("invalid descriptor %d", fd));
 	KASSERT(!fdisused(fdp, fd),
 	    ("fd_first_free() returned non-free descriptor"));
 	KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
 	    ("file descriptor isn't free"));
 	fdused(fdp, fd);
 	*result = fd;
 	return (0);
 }
 
 /*
  * Allocate n file descriptors for the process.
  */
 int
 fdallocn(struct thread *td, int minfd, int *fds, int n)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	int i;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	for (i = 0; i < n; i++)
 		if (fdalloc(td, 0, &fds[i]) != 0)
 			break;
 
 	if (i < n) {
 		for (i--; i >= 0; i--)
 			fdunused(fdp, fds[i]);
 		return (EMFILE);
 	}
 
 	return (0);
 }
 
 /*
  * Create a new open file structure and allocate a file descriptor for the
  * process that refers to it.  We add one reference to the file for the
  * descriptor table and one reference for resultfp. This is to prevent us
  * being preempted and the entry in the descriptor table closed after we
  * release the FILEDESC lock.
  */
 int
 falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags,
     struct filecaps *fcaps)
 {
 	struct file *fp;
 	int error, fd;
 
 	error = falloc_noinstall(td, &fp);
 	if (error)
 		return (error);		/* no reference held on error */
 
 	error = finstall(td, fp, &fd, flags, fcaps);
 	if (error) {
 		fdrop(fp, td);		/* one reference (fp only) */
 		return (error);
 	}
 
 	if (resultfp != NULL)
 		*resultfp = fp;		/* copy out result */
 	else
 		fdrop(fp, td);		/* release local reference */
 
 	if (resultfd != NULL)
 		*resultfd = fd;
 
 	return (0);
 }
 
 /*
  * Create a new open file structure without allocating a file descriptor.
  */
 int
 falloc_noinstall(struct thread *td, struct file **resultfp)
 {
 	struct file *fp;
 	int maxuserfiles = maxfiles - (maxfiles / 20);
 	int openfiles_new;
 	static struct timeval lastfail;
 	static int curfail;
 
 	KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__));
 
 	openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1;
 	if ((openfiles_new >= maxuserfiles &&
 	    priv_check(td, PRIV_MAXFILES) != 0) ||
 	    openfiles_new >= maxfiles) {
 		atomic_subtract_int(&openfiles, 1);
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("kern.maxfiles limit exceeded by uid %i, (%s) "
 			    "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm);
 		}
 		return (ENFILE);
 	}
 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
 	refcount_init(&fp->f_count, 1);
 	fp->f_cred = crhold(td->td_ucred);
 	fp->f_ops = &badfileops;
 	*resultfp = fp;
 	return (0);
 }
 
 /*
  * Install a file in a file descriptor table.
  */
 void
 _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
     struct filecaps *fcaps)
 {
 	struct filedescent *fde;
 
 	MPASS(fp != NULL);
 	if (fcaps != NULL)
 		filecaps_validate(fcaps, __func__);
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	fde = &fdp->fd_ofiles[fd];
 #ifdef CAPABILITIES
 	seq_write_begin(&fde->fde_seq);
 #endif
 	fde->fde_file = fp;
 	fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
 	if (fcaps != NULL)
 		filecaps_move(fcaps, &fde->fde_caps);
 	else
 		filecaps_fill(&fde->fde_caps);
 #ifdef CAPABILITIES
 	seq_write_end(&fde->fde_seq);
 #endif
 }
 
 int
 finstall(struct thread *td, struct file *fp, int *fd, int flags,
     struct filecaps *fcaps)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 
 	MPASS(fd != NULL);
 
 	FILEDESC_XLOCK(fdp);
 	if ((error = fdalloc(td, 0, fd))) {
 		FILEDESC_XUNLOCK(fdp);
 		return (error);
 	}
 	fhold(fp);
 	_finstall(fdp, fp, *fd, flags, fcaps);
 	FILEDESC_XUNLOCK(fdp);
 	return (0);
 }
 
 /*
  * Build a new filedesc structure from another.
  * Copy the current, root, and jail root vnode references.
  *
  * If fdp is not NULL, return with it shared locked.
  */
 struct filedesc *
 fdinit(struct filedesc *fdp, bool prepfiles)
 {
 	struct filedesc0 *newfdp0;
 	struct filedesc *newfdp;
 
 	newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
 	newfdp = &newfdp0->fd_fd;
 
 	/* Create the file descriptor table. */
 	FILEDESC_LOCK_INIT(newfdp);
 	refcount_init(&newfdp->fd_refcnt, 1);
 	refcount_init(&newfdp->fd_holdcnt, 1);
 	newfdp->fd_cmask = CMASK;
 	newfdp->fd_map = newfdp0->fd_dmap;
 	newfdp->fd_lastfile = -1;
 	newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
 	newfdp->fd_files->fdt_nfiles = NDFILE;
 
 	if (fdp == NULL)
 		return (newfdp);
 
 	if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles)
 		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 
 	FILEDESC_SLOCK(fdp);
 	newfdp->fd_cdir = fdp->fd_cdir;
 	if (newfdp->fd_cdir)
 		vrefact(newfdp->fd_cdir);
 	newfdp->fd_rdir = fdp->fd_rdir;
 	if (newfdp->fd_rdir)
 		vrefact(newfdp->fd_rdir);
 	newfdp->fd_jdir = fdp->fd_jdir;
 	if (newfdp->fd_jdir)
 		vrefact(newfdp->fd_jdir);
 
 	if (!prepfiles) {
 		FILEDESC_SUNLOCK(fdp);
 	} else {
 		while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
 			FILEDESC_SUNLOCK(fdp);
 			fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 			FILEDESC_SLOCK(fdp);
 		}
 	}
 
 	return (newfdp);
 }
 
 static struct filedesc *
 fdhold(struct proc *p)
 {
 	struct filedesc *fdp;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	fdp = p->p_fd;
 	if (fdp != NULL)
 		refcount_acquire(&fdp->fd_holdcnt);
 	return (fdp);
 }
 
 static void
 fddrop(struct filedesc *fdp)
 {
 
 	if (fdp->fd_holdcnt > 1) {
 		if (refcount_release(&fdp->fd_holdcnt) == 0)
 			return;
 	}
 
 	FILEDESC_LOCK_DESTROY(fdp);
 	uma_zfree(filedesc0_zone, fdp);
 }
 
 /*
  * Share a filedesc structure.
  */
 struct filedesc *
 fdshare(struct filedesc *fdp)
 {
 
 	refcount_acquire(&fdp->fd_refcnt);
 	return (fdp);
 }
 
 /*
  * Unshare a filedesc structure, if necessary by making a copy
  */
 void
 fdunshare(struct thread *td)
 {
 	struct filedesc *tmp;
 	struct proc *p = td->td_proc;
 
 	if (p->p_fd->fd_refcnt == 1)
 		return;
 
 	tmp = fdcopy(p->p_fd);
 	fdescfree(td);
 	p->p_fd = tmp;
 }
 
 void
 fdinstall_remapped(struct thread *td, struct filedesc *fdp)
 {
 
 	fdescfree(td);
 	td->td_proc->p_fd = fdp;
 }
 
 /*
  * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
  * this is to ease callers, not catch errors.
  */
 struct filedesc *
 fdcopy(struct filedesc *fdp)
 {
 	struct filedesc *newfdp;
 	struct filedescent *nfde, *ofde;
 	int i;
 
 	MPASS(fdp != NULL);
 
 	newfdp = fdinit(fdp, true);
 	/* copy all passable descriptors (i.e. not kqueue) */
 	newfdp->fd_freefile = -1;
 	for (i = 0; i <= fdp->fd_lastfile; ++i) {
 		ofde = &fdp->fd_ofiles[i];
 		if (ofde->fde_file == NULL ||
 		    (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
 			if (newfdp->fd_freefile == -1)
 				newfdp->fd_freefile = i;
 			continue;
 		}
 		nfde = &newfdp->fd_ofiles[i];
 		*nfde = *ofde;
 		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 		fhold(nfde->fde_file);
 		fdused_init(newfdp, i);
 		newfdp->fd_lastfile = i;
 	}
 	if (newfdp->fd_freefile == -1)
 		newfdp->fd_freefile = i;
 	newfdp->fd_cmask = fdp->fd_cmask;
 	FILEDESC_SUNLOCK(fdp);
 	return (newfdp);
 }
 
 /*
  * Copies a filedesc structure, while remapping all file descriptors
  * stored inside using a translation table.
  *
  * File descriptors are copied over to the new file descriptor table,
  * regardless of whether the close-on-exec flag is set.
  */
 int
 fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
     struct filedesc **ret)
 {
 	struct filedesc *newfdp;
 	struct filedescent *nfde, *ofde;
 	int error, i;
 
 	MPASS(fdp != NULL);
 
 	newfdp = fdinit(fdp, true);
 	if (nfds > fdp->fd_lastfile + 1) {
 		/* New table cannot be larger than the old one. */
 		error = E2BIG;
 		goto bad;
 	}
 	/* Copy all passable descriptors (i.e. not kqueue). */
 	newfdp->fd_freefile = nfds;
 	for (i = 0; i < nfds; ++i) {
 		if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) {
 			/* File descriptor out of bounds. */
 			error = EBADF;
 			goto bad;
 		}
 		ofde = &fdp->fd_ofiles[fds[i]];
 		if (ofde->fde_file == NULL) {
 			/* Unused file descriptor. */
 			error = EBADF;
 			goto bad;
 		}
 		if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
 			/* File descriptor cannot be passed. */
 			error = EINVAL;
 			goto bad;
 		}
 		nfde = &newfdp->fd_ofiles[i];
 		*nfde = *ofde;
 		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 		fhold(nfde->fde_file);
 		fdused_init(newfdp, i);
 		newfdp->fd_lastfile = i;
 	}
 	newfdp->fd_cmask = fdp->fd_cmask;
 	FILEDESC_SUNLOCK(fdp);
 	*ret = newfdp;
 	return (0);
 bad:
 	FILEDESC_SUNLOCK(fdp);
 	fdescfree_remapped(newfdp);
 	return (error);
 }
 
 /*
  * Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
  * one of processes using it exits) and the table used to be shared.
  */
 static void
 fdclearlocks(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct filedesc_to_leader *fdtol;
 	struct flock lf;
 	struct file *fp;
 	struct proc *p;
 	struct vnode *vp;
 	int i;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 	fdtol = p->p_fdtol;
 	MPASS(fdtol != NULL);
 
 	FILEDESC_XLOCK(fdp);
 	KASSERT(fdtol->fdl_refcount > 0,
 	    ("filedesc_to_refcount botch: fdl_refcount=%d",
 	    fdtol->fdl_refcount));
 	if (fdtol->fdl_refcount == 1 &&
 	    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 		for (i = 0; i <= fdp->fd_lastfile; i++) {
 			fp = fdp->fd_ofiles[i].fde_file;
 			if (fp == NULL || fp->f_type != DTYPE_VNODE)
 				continue;
 			fhold(fp);
 			FILEDESC_XUNLOCK(fdp);
 			lf.l_whence = SEEK_SET;
 			lf.l_start = 0;
 			lf.l_len = 0;
 			lf.l_type = F_UNLCK;
 			vp = fp->f_vnode;
 			(void) VOP_ADVLOCK(vp,
 			    (caddr_t)p->p_leader, F_UNLCK,
 			    &lf, F_POSIX);
 			FILEDESC_XLOCK(fdp);
 			fdrop(fp, td);
 		}
 	}
 retry:
 	if (fdtol->fdl_refcount == 1) {
 		if (fdp->fd_holdleaderscount > 0 &&
 		    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 			/*
 			 * close() or kern_dup() has cleared a reference
 			 * in a shared file descriptor table.
 			 */
 			fdp->fd_holdleaderswakeup = 1;
 			sx_sleep(&fdp->fd_holdleaderscount,
 			    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
 			goto retry;
 		}
 		if (fdtol->fdl_holdcount > 0) {
 			/*
 			 * Ensure that fdtol->fdl_leader remains
 			 * valid in closef().
 			 */
 			fdtol->fdl_wakeup = 1;
 			sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
 			    "fdlhold", 0);
 			goto retry;
 		}
 	}
 	fdtol->fdl_refcount--;
 	if (fdtol->fdl_refcount == 0 &&
 	    fdtol->fdl_holdcount == 0) {
 		fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
 		fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
 	} else
 		fdtol = NULL;
 	p->p_fdtol = NULL;
 	FILEDESC_XUNLOCK(fdp);
 	if (fdtol != NULL)
 		free(fdtol, M_FILEDESC_TO_LEADER);
 }
 
 /*
  * Release a filedesc structure.
  */
 static void
 fdescfree_fds(struct thread *td, struct filedesc *fdp, bool needclose)
 {
 	struct filedesc0 *fdp0;
 	struct freetable *ft, *tft;
 	struct filedescent *fde;
 	struct file *fp;
 	int i;
 
 	for (i = 0; i <= fdp->fd_lastfile; i++) {
 		fde = &fdp->fd_ofiles[i];
 		fp = fde->fde_file;
 		if (fp != NULL) {
 			fdefree_last(fde);
 			if (needclose)
 				(void) closef(fp, td);
 			else
 				fdrop(fp, td);
 		}
 	}
 
 	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
 		free(fdp->fd_map, M_FILEDESC);
 	if (fdp->fd_nfiles > NDFILE)
 		free(fdp->fd_files, M_FILEDESC);
 
 	fdp0 = (struct filedesc0 *)fdp;
 	SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
 		free(ft->ft_table, M_FILEDESC);
 
 	fddrop(fdp);
 }
 
 void
 fdescfree(struct thread *td)
 {
 	struct proc *p;
 	struct filedesc *fdp;
 	struct vnode *cdir, *jdir, *rdir;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 	MPASS(fdp != NULL);
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		racct_set(p, RACCT_NOFILE, 0);
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	if (p->p_fdtol != NULL)
 		fdclearlocks(td);
 
 	PROC_LOCK(p);
 	p->p_fd = NULL;
 	PROC_UNLOCK(p);
 
 	if (refcount_release(&fdp->fd_refcnt) == 0)
 		return;
 
 	FILEDESC_XLOCK(fdp);
 	cdir = fdp->fd_cdir;
 	fdp->fd_cdir = NULL;
 	rdir = fdp->fd_rdir;
 	fdp->fd_rdir = NULL;
 	jdir = fdp->fd_jdir;
 	fdp->fd_jdir = NULL;
 	FILEDESC_XUNLOCK(fdp);
 
 	if (cdir != NULL)
 		vrele(cdir);
 	if (rdir != NULL)
 		vrele(rdir);
 	if (jdir != NULL)
 		vrele(jdir);
 
 	fdescfree_fds(td, fdp, 1);
 }
 
 void
 fdescfree_remapped(struct filedesc *fdp)
 {
 
 	if (fdp->fd_cdir != NULL)
 		vrele(fdp->fd_cdir);
 	if (fdp->fd_rdir != NULL)
 		vrele(fdp->fd_rdir);
 	if (fdp->fd_jdir != NULL)
 		vrele(fdp->fd_jdir);
 
 	fdescfree_fds(curthread, fdp, 0);
 }
 
 /*
  * For setugid programs, we don't want to people to use that setugidness
  * to generate error messages which write to a file which otherwise would
  * otherwise be off-limits to the process.  We check for filesystems where
  * the vnode can change out from under us after execve (like [lin]procfs).
  *
  * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is
  * sufficient.  We also don't check for setugidness since we know we are.
  */
 static bool
 is_unsafe(struct file *fp)
 {
 	struct vnode *vp;
 
 	if (fp->f_type != DTYPE_VNODE)
 		return (false);
 
 	vp = fp->f_vnode;
 	return ((vp->v_vflag & VV_PROCDEP) != 0);
 }
 
 /*
  * Make this setguid thing safe, if at all possible.
  */
 void
 fdsetugidsafety(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int i;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	MPASS(fdp->fd_nfiles >= 3);
 	for (i = 0; i <= 2; i++) {
 		fp = fdp->fd_ofiles[i].fde_file;
 		if (fp != NULL && is_unsafe(fp)) {
 			FILEDESC_XLOCK(fdp);
 			knote_fdclose(td, i);
 			/*
 			 * NULL-out descriptor prior to close to avoid
 			 * a race while close blocks.
 			 */
 			fdfree(fdp, i);
 			FILEDESC_XUNLOCK(fdp);
 			(void) closef(fp, td);
 		}
 	}
 }
 
 /*
  * If a specific file object occupies a specific file descriptor, close the
  * file descriptor entry and drop a reference on the file object.  This is a
  * convenience function to handle a subsequent error in a function that calls
  * falloc() that handles the race that another thread might have closed the
  * file descriptor out from under the thread creating the file object.
  */
 void
 fdclose(struct thread *td, struct file *fp, int idx)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 
 	FILEDESC_XLOCK(fdp);
 	if (fdp->fd_ofiles[idx].fde_file == fp) {
 		fdfree(fdp, idx);
 		FILEDESC_XUNLOCK(fdp);
 		fdrop(fp, td);
 	} else
 		FILEDESC_XUNLOCK(fdp);
 }
 
 /*
  * Close any files on exec?
  */
 void
 fdcloseexec(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct filedescent *fde;
 	struct file *fp;
 	int i;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	for (i = 0; i <= fdp->fd_lastfile; i++) {
 		fde = &fdp->fd_ofiles[i];
 		fp = fde->fde_file;
 		if (fp != NULL && (fp->f_type == DTYPE_MQUEUE ||
 		    (fde->fde_flags & UF_EXCLOSE))) {
 			FILEDESC_XLOCK(fdp);
 			fdfree(fdp, i);
 			(void) closefp(fdp, i, fp, td, 0);
 			FILEDESC_UNLOCK_ASSERT(fdp);
 		}
 	}
 }
 
 /*
  * It is unsafe for set[ug]id processes to be started with file
  * descriptors 0..2 closed, as these descriptors are given implicit
  * significance in the Standard C library.  fdcheckstd() will create a
  * descriptor referencing /dev/null for each of stdin, stdout, and
  * stderr that is not already open.
  */
 int
 fdcheckstd(struct thread *td)
 {
 	struct filedesc *fdp;
 	register_t save;
 	int i, error, devnull;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	MPASS(fdp->fd_nfiles >= 3);
 	devnull = -1;
 	for (i = 0; i <= 2; i++) {
 		if (fdp->fd_ofiles[i].fde_file != NULL)
 			continue;
 
 		save = td->td_retval[0];
 		if (devnull != -1) {
 			error = kern_dup(td, FDDUP_FIXED, 0, devnull, i);
 		} else {
 			error = kern_openat(td, AT_FDCWD, "/dev/null",
 			    UIO_SYSSPACE, O_RDWR, 0);
 			if (error == 0) {
 				devnull = td->td_retval[0];
 				KASSERT(devnull == i, ("we didn't get our fd"));
 			}
 		}
 		td->td_retval[0] = save;
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Internal form of close.  Decrement reference count on file structure.
  * Note: td may be NULL when closing a file that was being passed in a
  * message.
  *
  * XXXRW: Giant is not required for the caller, but often will be held; this
  * makes it moderately likely the Giant will be recursed in the VFS case.
  */
 int
 closef(struct file *fp, struct thread *td)
 {
 	struct vnode *vp;
 	struct flock lf;
 	struct filedesc_to_leader *fdtol;
 	struct filedesc *fdp;
 
 	/*
 	 * POSIX record locking dictates that any close releases ALL
 	 * locks owned by this process.  This is handled by setting
 	 * a flag in the unlock to free ONLY locks obeying POSIX
 	 * semantics, and not to free BSD-style file locks.
 	 * If the descriptor was in a message, POSIX-style locks
 	 * aren't passed with the descriptor, and the thread pointer
 	 * will be NULL.  Callers should be careful only to pass a
 	 * NULL thread pointer when there really is no owning
 	 * context that might have locks, or the locks will be
 	 * leaked.
 	 */
 	if (fp->f_type == DTYPE_VNODE && td != NULL) {
 		vp = fp->f_vnode;
 		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 			lf.l_whence = SEEK_SET;
 			lf.l_start = 0;
 			lf.l_len = 0;
 			lf.l_type = F_UNLCK;
 			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
 			    F_UNLCK, &lf, F_POSIX);
 		}
 		fdtol = td->td_proc->p_fdtol;
 		if (fdtol != NULL) {
 			/*
 			 * Handle special case where file descriptor table is
 			 * shared between multiple process leaders.
 			 */
 			fdp = td->td_proc->p_fd;
 			FILEDESC_XLOCK(fdp);
 			for (fdtol = fdtol->fdl_next;
 			    fdtol != td->td_proc->p_fdtol;
 			    fdtol = fdtol->fdl_next) {
 				if ((fdtol->fdl_leader->p_flag &
 				    P_ADVLOCK) == 0)
 					continue;
 				fdtol->fdl_holdcount++;
 				FILEDESC_XUNLOCK(fdp);
 				lf.l_whence = SEEK_SET;
 				lf.l_start = 0;
 				lf.l_len = 0;
 				lf.l_type = F_UNLCK;
 				vp = fp->f_vnode;
 				(void) VOP_ADVLOCK(vp,
 				    (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
 				    F_POSIX);
 				FILEDESC_XLOCK(fdp);
 				fdtol->fdl_holdcount--;
 				if (fdtol->fdl_holdcount == 0 &&
 				    fdtol->fdl_wakeup != 0) {
 					fdtol->fdl_wakeup = 0;
 					wakeup(fdtol);
 				}
 			}
 			FILEDESC_XUNLOCK(fdp);
 		}
 	}
 	return (fdrop(fp, td));
 }
 
 /*
  * Initialize the file pointer with the specified properties.
  *
  * The ops are set with release semantics to be certain that the flags, type,
  * and data are visible when ops is.  This is to prevent ops methods from being
  * called with bad data.
  */
 void
 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
 {
 	fp->f_data = data;
 	fp->f_flag = flag;
 	fp->f_type = type;
 	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
 }
 
 int
 fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, struct filecaps *havecapsp)
 {
 	struct filedescent *fde;
 	int error;
 
 	FILEDESC_LOCK_ASSERT(fdp);
 
 	fde = fdeget_locked(fdp, fd);
 	if (fde == NULL) {
 		error = EBADF;
 		goto out;
 	}
 
 #ifdef CAPABILITIES
 	error = cap_check(cap_rights_fde(fde), needrightsp);
 	if (error != 0)
 		goto out;
 #endif
 
 	if (havecapsp != NULL)
 		filecaps_copy(&fde->fde_caps, havecapsp, true);
 
 	*fpp = fde->fde_file;
 
 	error = 0;
 out:
 	return (error);
 }
 
 int
 fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp,
     struct file **fpp, struct filecaps *havecapsp)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 #ifndef CAPABILITIES
 	error = fget_unlocked(fdp, fd, needrightsp, fpp, NULL);
 	if (error == 0 && havecapsp != NULL)
 		filecaps_fill(havecapsp);
 #else
 	struct file *fp;
 	seq_t seq;
 
 	for (;;) {
 		error = fget_unlocked(fdp, fd, needrightsp, &fp, &seq);
 		if (error != 0)
 			return (error);
 
 		if (havecapsp != NULL) {
 			if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps,
 			    havecapsp, false)) {
 				fdrop(fp, td);
 				goto get_locked;
 			}
 		}
 
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(fp, td);
 	}
 
 	*fpp = fp;
 	return (0);
 
 get_locked:
 	FILEDESC_SLOCK(fdp);
 	error = fget_cap_locked(fdp, fd, needrightsp, fpp, havecapsp);
 	if (error == 0)
 		fhold(*fpp);
 	FILEDESC_SUNLOCK(fdp);
 #endif
 	return (error);
 }
 
 int
 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, seq_t *seqp)
 {
 #ifdef CAPABILITIES
 	struct filedescent *fde;
 #endif
 	struct fdescenttbl *fdt;
 	struct file *fp;
 	u_int count;
 #ifdef CAPABILITIES
 	seq_t seq;
 	cap_rights_t haverights;
 	int error;
 #endif
 
 	fdt = fdp->fd_files;
 	if ((u_int)fd >= fdt->fdt_nfiles)
 		return (EBADF);
 	/*
 	 * Fetch the descriptor locklessly.  We avoid fdrop() races by
 	 * never raising a refcount above 0.  To accomplish this we have
 	 * to use a cmpset loop rather than an atomic_add.  The descriptor
 	 * must be re-verified once we acquire a reference to be certain
 	 * that the identity is still correct and we did not lose a race
 	 * due to preemption.
 	 */
 	for (;;) {
 #ifdef CAPABILITIES
 		seq = seq_read(fd_seq(fdt, fd));
 		fde = &fdt->fdt_ofiles[fd];
 		haverights = *cap_rights_fde(fde);
 		fp = fde->fde_file;
 		if (!seq_consistent(fd_seq(fdt, fd), seq))
 			continue;
 #else
 		fp = fdt->fdt_ofiles[fd].fde_file;
 #endif
 		if (fp == NULL)
 			return (EBADF);
 #ifdef CAPABILITIES
 		error = cap_check(&haverights, needrightsp);
 		if (error != 0)
 			return (error);
 #endif
 		count = fp->f_count;
 	retry:
 		if (count == 0) {
 			/*
 			 * Force a reload. Other thread could reallocate the
 			 * table before this fd was closed, so it possible that
 			 * there is a stale fp pointer in cached version.
 			 */
 			fdt = *(struct fdescenttbl * volatile *)&(fdp->fd_files);
 			continue;
 		}
 		/*
 		 * Use an acquire barrier to force re-reading of fdt so it is
 		 * refreshed for verification.
 		 */
 		if (atomic_fcmpset_acq_int(&fp->f_count, &count, count + 1) == 0)
 			goto retry;
 		fdt = fdp->fd_files;
 #ifdef	CAPABILITIES
 		if (seq_consistent_nomb(fd_seq(fdt, fd), seq))
 #else
 		if (fp == fdt->fdt_ofiles[fd].fde_file)
 #endif
 			break;
 		fdrop(fp, curthread);
 	}
 	*fpp = fp;
 	if (seqp != NULL) {
 #ifdef CAPABILITIES
 		*seqp = seq;
 #endif
 	}
 	return (0);
 }
 
 /*
  * Extract the file pointer associated with the specified descriptor for the
  * current user process.
  *
  * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
  * returned.
  *
  * File's rights will be checked against the capability rights mask.
  *
  * If an error occurred the non-zero error is returned and *fpp is set to
  * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
  * responsible for fdrop().
  */
 static __inline int
 _fget(struct thread *td, int fd, struct file **fpp, int flags,
     cap_rights_t *needrightsp, seq_t *seqp)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int error;
 
 	*fpp = NULL;
 	fdp = td->td_proc->p_fd;
 	error = fget_unlocked(fdp, fd, needrightsp, &fp, seqp);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 
 	/*
 	 * FREAD and FWRITE failure return EBADF as per POSIX.
 	 */
 	error = 0;
 	switch (flags) {
 	case FREAD:
 	case FWRITE:
 		if ((fp->f_flag & flags) == 0)
 			error = EBADF;
 		break;
 	case FEXEC:
 	    	if ((fp->f_flag & (FREAD | FEXEC)) == 0 ||
 		    ((fp->f_flag & FWRITE) != 0))
 			error = EBADF;
 		break;
 	case 0:
 		break;
 	default:
 		KASSERT(0, ("wrong flags"));
 	}
 
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 
 	*fpp = fp;
 	return (0);
 }
 
 int
 fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, 0, rightsp, NULL));
 }
 
 int
 fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, u_char *maxprotp,
     struct file **fpp)
 {
 	int error;
 #ifndef CAPABILITIES
 	error = _fget(td, fd, fpp, 0, rightsp, NULL);
 	if (maxprotp != NULL)
 		*maxprotp = VM_PROT_ALL;
 #else
 	struct filedesc *fdp = td->td_proc->p_fd;
 	seq_t seq;
 
 	MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
 	for (;;) {
 		error = _fget(td, fd, fpp, 0, rightsp, &seq);
 		if (error != 0)
 			return (error);
 		/*
 		 * If requested, convert capability rights to access flags.
 		 */
 		if (maxprotp != NULL)
 			*maxprotp = cap_rights_to_vmprot(cap_rights(fdp, fd));
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(*fpp, td);
 	}
 #endif
 	return (error);
 }
 
 int
 fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, FREAD, rightsp, NULL));
 }
 
 int
 fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, FWRITE, rightsp, NULL));
 }
 
 int
 fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl,
     struct file **fpp)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 #ifndef CAPABILITIES
 	return (fget_unlocked(fdp, fd, rightsp, fpp, NULL));
 #else
 	int error;
 	seq_t seq;
 
 	MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
 	for (;;) {
 		error = fget_unlocked(fdp, fd, rightsp, fpp, &seq);
 		if (error != 0)
 			return (error);
 		error = cap_fcntl_check(fdp, fd, needfcntl);
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(*fpp, td);
 	}
 	if (error != 0) {
 		fdrop(*fpp, td);
 		*fpp = NULL;
 	}
 	return (error);
 #endif
 }
 
 /*
  * Like fget() but loads the underlying vnode, or returns an error if the
  * descriptor does not represent a vnode.  Note that pipes use vnodes but
  * never have VM objects.  The returned vnode will be vref()'d.
  *
  * XXX: what about the unused flags ?
  */
 static __inline int
 _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp,
     struct vnode **vpp)
 {
 	struct file *fp;
 	int error;
 
 	*vpp = NULL;
 	error = _fget(td, fd, &fp, flags, needrightsp, NULL);
 	if (error != 0)
 		return (error);
 	if (fp->f_vnode == NULL) {
 		error = EINVAL;
 	} else {
 		*vpp = fp->f_vnode;
 		vrefact(*vpp);
 	}
 	fdrop(fp, td);
 
 	return (error);
 }
 
 int
 fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, 0, rightsp, vpp));
 }
 
 int
 fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp,
     struct filecaps *havecaps, struct vnode **vpp)
 {
 	struct filedesc *fdp;
 	struct filecaps caps;
 	struct file *fp;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	error = fget_cap_locked(fdp, fd, needrightsp, &fp, &caps);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
 		error = EBADF;
 		goto out;
 	}
 	if (fp->f_vnode == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	*havecaps = caps;
 	*vpp = fp->f_vnode;
 	vrefact(*vpp);
 
 	return (0);
 out:
 	filecaps_free(&caps);
 	return (error);
 }
 
 int
 fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FREAD, rightsp, vpp));
 }
 
 int
 fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FEXEC, rightsp, vpp));
 }
 
 #ifdef notyet
 int
 fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
     struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FWRITE, rightsp, vpp));
 }
 #endif
 
 /*
  * Handle the last reference to a file being closed.
  */
 int
 _fdrop(struct file *fp, struct thread *td)
 {
 	int error;
 
 	if (fp->f_count != 0)
 		panic("fdrop: count %d", fp->f_count);
 	error = fo_close(fp, td);
 	atomic_subtract_int(&openfiles, 1);
 	crfree(fp->f_cred);
 	free(fp->f_advice, M_FADVISE);
 	uma_zfree(file_zone, fp);
 
 	return (error);
 }
 
 /*
  * Apply an advisory lock on a file descriptor.
  *
  * Just attempt to get a record lock of the requested type on the entire file
  * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct flock_args {
 	int	fd;
 	int	how;
 };
 #endif
 /* ARGSUSED */
 int
 sys_flock(struct thread *td, struct flock_args *uap)
 {
 	struct file *fp;
 	struct vnode *vp;
 	struct flock lf;
 	cap_rights_t rights;
 	int error;
 
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FLOCK), &fp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_VNODE) {
 		fdrop(fp, td);
 		return (EOPNOTSUPP);
 	}
 
 	vp = fp->f_vnode;
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	if (uap->how & LOCK_UN) {
 		lf.l_type = F_UNLCK;
 		atomic_clear_int(&fp->f_flag, FHASLOCK);
 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 		goto done2;
 	}
 	if (uap->how & LOCK_EX)
 		lf.l_type = F_WRLCK;
 	else if (uap->how & LOCK_SH)
 		lf.l_type = F_RDLCK;
 	else {
 		error = EBADF;
 		goto done2;
 	}
 	atomic_set_int(&fp->f_flag, FHASLOCK);
 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
 done2:
 	fdrop(fp, td);
 	return (error);
 }
 /*
  * Duplicate the specified descriptor to a free descriptor.
  */
 int
 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
     int openerror, int *indxp)
 {
 	struct filedescent *newfde, *oldfde;
 	struct file *fp;
 	int error, indx;
 
 	KASSERT(openerror == ENODEV || openerror == ENXIO,
 	    ("unexpected error %d in %s", openerror, __func__));
 
 	/*
 	 * If the to-be-dup'd fd number is greater than the allowed number
 	 * of file descriptors, or the fd to be dup'd has already been
 	 * closed, then reject.
 	 */
 	FILEDESC_XLOCK(fdp);
 	if ((fp = fget_locked(fdp, dfd)) == NULL) {
 		FILEDESC_XUNLOCK(fdp);
 		return (EBADF);
 	}
 
 	error = fdalloc(td, 0, &indx);
 	if (error != 0) {
 		FILEDESC_XUNLOCK(fdp);
 		return (error);
 	}
 
 	/*
 	 * There are two cases of interest here.
 	 *
 	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
 	 *
 	 * For ENXIO steal away the file structure from (dfd) and store it in
 	 * (indx).  (dfd) is effectively closed by this operation.
 	 */
 	switch (openerror) {
 	case ENODEV:
 		/*
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
 		 */
 		if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
 			fdunused(fdp, indx);
 			FILEDESC_XUNLOCK(fdp);
 			return (EACCES);
 		}
 		fhold(fp);
 		newfde = &fdp->fd_ofiles[indx];
 		oldfde = &fdp->fd_ofiles[dfd];
 #ifdef CAPABILITIES
 		seq_write_begin(&newfde->fde_seq);
 #endif
 		memcpy(newfde, oldfde, fde_change_size);
 		filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true);
 #ifdef CAPABILITIES
 		seq_write_end(&newfde->fde_seq);
 #endif
 		break;
 	case ENXIO:
 		/*
 		 * Steal away the file pointer from dfd and stuff it into indx.
 		 */
 		newfde = &fdp->fd_ofiles[indx];
 		oldfde = &fdp->fd_ofiles[dfd];
 #ifdef CAPABILITIES
 		seq_write_begin(&newfde->fde_seq);
 #endif
 		memcpy(newfde, oldfde, fde_change_size);
 		oldfde->fde_file = NULL;
 		fdunused(fdp, dfd);
 #ifdef CAPABILITIES
 		seq_write_end(&newfde->fde_seq);
 #endif
 		break;
 	}
 	FILEDESC_XUNLOCK(fdp);
 	*indxp = indx;
 	return (0);
 }
 
 /*
  * This sysctl determines if we will allow a process to chroot(2) if it
  * has a directory open:
  *	0: disallowed for all processes.
  *	1: allowed for processes that were not already chroot(2)'ed.
  *	2: allowed for all processes.
  */
 
 static int chroot_allow_open_directories = 1;
 
 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
     &chroot_allow_open_directories, 0,
     "Allow a process to chroot(2) if it has a directory open");
 
 /*
  * Helper function for raised chroot(2) security function:  Refuse if
  * any filedescriptors are open directories.
  */
 static int
 chroot_refuse_vdir_fds(struct filedesc *fdp)
 {
 	struct vnode *vp;
 	struct file *fp;
 	int fd;
 
 	FILEDESC_LOCK_ASSERT(fdp);
 
 	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
 		fp = fget_locked(fdp, fd);
 		if (fp == NULL)
 			continue;
 		if (fp->f_type == DTYPE_VNODE) {
 			vp = fp->f_vnode;
 			if (vp->v_type == VDIR)
 				return (EPERM);
 		}
 	}
 	return (0);
 }
 
 /*
  * Common routine for kern_chroot() and jail_attach().  The caller is
  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  * authorize this operation.
  */
 int
 pwd_chroot(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	if (chroot_allow_open_directories == 0 ||
 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 		error = chroot_refuse_vdir_fds(fdp);
 		if (error != 0) {
 			FILEDESC_XUNLOCK(fdp);
 			return (error);
 		}
 	}
 	oldvp = fdp->fd_rdir;
 	vrefact(vp);
 	fdp->fd_rdir = vp;
 	if (fdp->fd_jdir == NULL) {
 		vrefact(vp);
 		fdp->fd_jdir = vp;
 	}
 	FILEDESC_XUNLOCK(fdp);
 	vrele(oldvp);
 	return (0);
 }
 
 void
 pwd_chdir(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("chdir to a vnode with zero usecount"));
 	oldvp = fdp->fd_cdir;
 	fdp->fd_cdir = vp;
 	FILEDESC_XUNLOCK(fdp);
 	vrele(oldvp);
 }
 
 /*
  * Scan all active processes and prisons to see if any of them have a current
  * or root directory of `olddp'. If so, replace them with the new mount point.
  */
 void
 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 {
 	struct filedesc *fdp;
 	struct prison *pr;
 	struct proc *p;
 	int nrele;
 
 	if (vrefcnt(olddp) == 1)
 		return;
 	nrele = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		fdp = fdhold(p);
 		PROC_UNLOCK(p);
 		if (fdp == NULL)
 			continue;
 		FILEDESC_XLOCK(fdp);
 		if (fdp->fd_cdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_cdir = newdp;
 			nrele++;
 		}
 		if (fdp->fd_rdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_rdir = newdp;
 			nrele++;
 		}
 		if (fdp->fd_jdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_jdir = newdp;
 			nrele++;
 		}
 		FILEDESC_XUNLOCK(fdp);
 		fddrop(fdp);
 	}
 	sx_sunlock(&allproc_lock);
 	if (rootvnode == olddp) {
 		vrefact(newdp);
 		rootvnode = newdp;
 		nrele++;
 	}
 	mtx_lock(&prison0.pr_mtx);
 	if (prison0.pr_root == olddp) {
 		vrefact(newdp);
 		prison0.pr_root = newdp;
 		nrele++;
 	}
 	mtx_unlock(&prison0.pr_mtx);
 	sx_slock(&allprison_lock);
 	TAILQ_FOREACH(pr, &allprison, pr_list) {
 		mtx_lock(&pr->pr_mtx);
 		if (pr->pr_root == olddp) {
 			vrefact(newdp);
 			pr->pr_root = newdp;
 			nrele++;
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 	sx_sunlock(&allprison_lock);
 	while (nrele--)
 		vrele(olddp);
 }
 
 struct filedesc_to_leader *
 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
 {
 	struct filedesc_to_leader *fdtol;
 
 	fdtol = malloc(sizeof(struct filedesc_to_leader),
 	    M_FILEDESC_TO_LEADER, M_WAITOK);
 	fdtol->fdl_refcount = 1;
 	fdtol->fdl_holdcount = 0;
 	fdtol->fdl_wakeup = 0;
 	fdtol->fdl_leader = leader;
 	if (old != NULL) {
 		FILEDESC_XLOCK(fdp);
 		fdtol->fdl_next = old->fdl_next;
 		fdtol->fdl_prev = old;
 		old->fdl_next = fdtol;
 		fdtol->fdl_next->fdl_prev = fdtol;
 		FILEDESC_XUNLOCK(fdp);
 	} else {
 		fdtol->fdl_next = fdtol;
 		fdtol->fdl_prev = fdtol;
 	}
 	return (fdtol);
 }
 
 static int
 sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS)
 {
 	struct filedesc *fdp;
 	int i, count, slots;
 
 	if (*(int *)arg1 != 0)
 		return (EINVAL);
 
 	fdp = curproc->p_fd;
 	count = 0;
 	FILEDESC_SLOCK(fdp);
 	slots = NDSLOTS(fdp->fd_lastfile + 1);
 	for (i = 0; i < slots; i++)
 		count += bitcountl(fdp->fd_map[i]);
 	FILEDESC_SUNLOCK(fdp);
 
 	return (SYSCTL_OUT(req, &count, sizeof(count)));
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds,
     CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds,
     "Number of open file descriptors");
 
 /*
  * Get file structures globally.
  */
 static int
 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 {
 	struct xfile xf;
 	struct filedesc *fdp;
 	struct file *fp;
 	struct proc *p;
 	int error, n;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	if (req->oldptr == NULL) {
 		n = 0;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			fdp = fdhold(p);
 			PROC_UNLOCK(p);
 			if (fdp == NULL)
 				continue;
 			/* overestimates sparse tables. */
 			if (fdp->fd_lastfile > 0)
 				n += fdp->fd_lastfile;
 			fddrop(fdp);
 		}
 		sx_sunlock(&allproc_lock);
 		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
 	}
 	error = 0;
 	bzero(&xf, sizeof(xf));
 	xf.xf_size = sizeof(xf);
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (p_cansee(req->td, p) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		xf.xf_pid = p->p_pid;
 		xf.xf_uid = p->p_ucred->cr_uid;
 		fdp = fdhold(p);
 		PROC_UNLOCK(p);
 		if (fdp == NULL)
 			continue;
 		FILEDESC_SLOCK(fdp);
 		for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) {
 			if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 				continue;
 			xf.xf_fd = n;
 			xf.xf_file = fp;
 			xf.xf_data = fp->f_data;
 			xf.xf_vnode = fp->f_vnode;
 			xf.xf_type = fp->f_type;
 			xf.xf_count = fp->f_count;
 			xf.xf_msgcount = 0;
 			xf.xf_offset = foffset_get(fp);
 			xf.xf_flag = fp->f_flag;
 			error = SYSCTL_OUT(req, &xf, sizeof(xf));
 			if (error)
 				break;
 		}
 		FILEDESC_SUNLOCK(fdp);
 		fddrop(fdp);
 		if (error)
 			break;
 	}
 	sx_sunlock(&allproc_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
 
 #ifdef KINFO_FILE_SIZE
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
 static int
 xlate_fflags(int fflags)
 {
 	static const struct {
 		int	fflag;
 		int	kf_fflag;
 	} fflags_table[] = {
 		{ FAPPEND, KF_FLAG_APPEND },
 		{ FASYNC, KF_FLAG_ASYNC },
 		{ FFSYNC, KF_FLAG_FSYNC },
 		{ FHASLOCK, KF_FLAG_HASLOCK },
 		{ FNONBLOCK, KF_FLAG_NONBLOCK },
 		{ FREAD, KF_FLAG_READ },
 		{ FWRITE, KF_FLAG_WRITE },
 		{ O_CREAT, KF_FLAG_CREAT },
 		{ O_DIRECT, KF_FLAG_DIRECT },
 		{ O_EXCL, KF_FLAG_EXCL },
 		{ O_EXEC, KF_FLAG_EXEC },
 		{ O_EXLOCK, KF_FLAG_EXLOCK },
 		{ O_NOFOLLOW, KF_FLAG_NOFOLLOW },
 		{ O_SHLOCK, KF_FLAG_SHLOCK },
 		{ O_TRUNC, KF_FLAG_TRUNC }
 	};
 	unsigned int i;
 	int kflags;
 
 	kflags = 0;
 	for (i = 0; i < nitems(fflags_table); i++)
 		if (fflags & fflags_table[i].fflag)
 			kflags |=  fflags_table[i].kf_fflag;
 	return (kflags);
 }
 
 /* Trim unused data from kf_path by truncating the structure size. */
 static void
 pack_kinfo(struct kinfo_file *kif)
 {
 
 	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
 	    strlen(kif->kf_path) + 1;
 	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
 }
 
 static void
 export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
     struct kinfo_file *kif, struct filedesc *fdp, int flags)
 {
 	int error;
 
 	bzero(kif, sizeof(*kif));
 
 	/* Set a default type to allow for empty fill_kinfo() methods. */
 	kif->kf_type = KF_TYPE_UNKNOWN;
 	kif->kf_flags = xlate_fflags(fp->f_flag);
 	if (rightsp != NULL)
 		kif->kf_cap_rights = *rightsp;
 	else
 		cap_rights_init(&kif->kf_cap_rights);
 	kif->kf_fd = fd;
 	kif->kf_ref_count = fp->f_count;
 	kif->kf_offset = foffset_get(fp);
 
 	/*
 	 * This may drop the filedesc lock, so the 'fp' cannot be
 	 * accessed after this call.
 	 */
 	error = fo_fill_kinfo(fp, kif, fdp);
 	if (error == 0)
 		kif->kf_status |= KF_ATTR_VALID;
 	if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 		pack_kinfo(kif);
 	else
 		kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 }
 
 static void
 export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
     struct kinfo_file *kif, int flags)
 {
 	int error;
 
 	bzero(kif, sizeof(*kif));
 
 	kif->kf_type = KF_TYPE_VNODE;
 	error = vn_fill_kinfo_vnode(vp, kif);
 	if (error == 0)
 		kif->kf_status |= KF_ATTR_VALID;
 	kif->kf_flags = xlate_fflags(fflags);
 	cap_rights_init(&kif->kf_cap_rights);
 	kif->kf_fd = fd;
 	kif->kf_ref_count = -1;
 	kif->kf_offset = -1;
 	if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 		pack_kinfo(kif);
 	else
 		kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 	vrele(vp);
 }
 
 struct export_fd_buf {
 	struct filedesc		*fdp;
 	struct sbuf 		*sb;
 	ssize_t			remainder;
 	struct kinfo_file	kif;
 	int			flags;
 };
 
 static int
 export_kinfo_to_sb(struct export_fd_buf *efbuf)
 {
 	struct kinfo_file *kif;
 
 	kif = &efbuf->kif;
 	if (efbuf->remainder != -1) {
 		if (efbuf->remainder < kif->kf_structsize) {
 			/* Terminate export. */
 			efbuf->remainder = 0;
 			return (0);
 		}
 		efbuf->remainder -= kif->kf_structsize;
 	}
 	return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM);
 }
 
 static int
 export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp,
     struct export_fd_buf *efbuf)
 {
 	int error;
 
 	if (efbuf->remainder == 0)
 		return (0);
 	export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp,
 	    efbuf->flags);
 	FILEDESC_SUNLOCK(efbuf->fdp);
 	error = export_kinfo_to_sb(efbuf);
 	FILEDESC_SLOCK(efbuf->fdp);
 	return (error);
 }
 
 static int
 export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
     struct export_fd_buf *efbuf)
 {
 	int error;
 
 	if (efbuf->remainder == 0)
 		return (0);
 	if (efbuf->fdp != NULL)
 		FILEDESC_SUNLOCK(efbuf->fdp);
 	export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags);
 	error = export_kinfo_to_sb(efbuf);
 	if (efbuf->fdp != NULL)
 		FILEDESC_SLOCK(efbuf->fdp);
 	return (error);
 }
 
 /*
  * Store a process file descriptor information to sbuf.
  *
  * Takes a locked proc as argument, and returns with the proc unlocked.
  */
 int
 kern_proc_filedesc_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen,
     int flags)
 {
 	struct file *fp;
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
 	struct vnode *cttyvp, *textvp, *tracevp;
 	int error, i;
 	cap_rights_t rights;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/* ktrace vnode */
 	tracevp = p->p_tracevp;
 	if (tracevp != NULL)
 		vrefact(tracevp);
 	/* text vnode */
 	textvp = p->p_textvp;
 	if (textvp != NULL)
 		vrefact(textvp);
 	/* Controlling tty. */
 	cttyvp = NULL;
 	if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) {
 		cttyvp = p->p_pgrp->pg_session->s_ttyvp;
 		if (cttyvp != NULL)
 			vrefact(cttyvp);
 	}
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 	efbuf->fdp = NULL;
 	efbuf->sb = sb;
 	efbuf->remainder = maxlen;
 	efbuf->flags = flags;
 	if (tracevp != NULL)
 		export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE,
 		    efbuf);
 	if (textvp != NULL)
 		export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf);
 	if (cttyvp != NULL)
 		export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE,
 		    efbuf);
 	error = 0;
 	if (fdp == NULL)
 		goto fail;
 	efbuf->fdp = fdp;
 	FILEDESC_SLOCK(fdp);
 	/* working directory */
 	if (fdp->fd_cdir != NULL) {
 		vrefact(fdp->fd_cdir);
 		export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
 	}
 	/* root directory */
 	if (fdp->fd_rdir != NULL) {
 		vrefact(fdp->fd_rdir);
 		export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
 	}
 	/* jail directory */
 	if (fdp->fd_jdir != NULL) {
 		vrefact(fdp->fd_jdir);
 		export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
 	}
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
 #ifdef CAPABILITIES
 		rights = *cap_rights(fdp, i);
 #else /* !CAPABILITIES */
 		cap_rights_init(&rights);
 #endif
 		/*
 		 * Create sysctl entry.  It is OK to drop the filedesc
 		 * lock inside of export_file_to_sb() as we will
 		 * re-validate and re-evaluate its properties when the
 		 * loop continues.
 		 */
 		error = export_file_to_sb(fp, i, &rights, efbuf);
 		if (error != 0 || efbuf->remainder == 0)
 			break;
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 fail:
 	free(efbuf, M_TEMP);
 	return (error);
 }
 
 #define FILEDESC_SBUF_SIZE	(sizeof(struct kinfo_file) * 5)
 
 /*
  * Get per-process file descriptors for use by procstat(1), et al.
  */
 static int
 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct proc *p;
 	ssize_t maxlen;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 
 	sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	maxlen = req->oldptr != NULL ? req->oldlen : -1;
 	error = kern_proc_filedesc_out(p, &sb, maxlen,
 	    KERN_FILEDESC_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #ifdef COMPAT_FREEBSD7
 #ifdef KINFO_OFILE_SIZE
 CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
 #endif
 
 static void
 kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif)
 {
 
 	okif->kf_structsize = sizeof(*okif);
 	okif->kf_type = kif->kf_type;
 	okif->kf_fd = kif->kf_fd;
 	okif->kf_ref_count = kif->kf_ref_count;
 	okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE |
 	    KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK |
 	    KF_FLAG_DIRECT | KF_FLAG_HASLOCK);
 	okif->kf_offset = kif->kf_offset;
-	okif->kf_vnode_type = kif->kf_vnode_type;
-	okif->kf_sock_domain = kif->kf_sock_domain;
-	okif->kf_sock_type = kif->kf_sock_type;
-	okif->kf_sock_protocol = kif->kf_sock_protocol;
+	if (kif->kf_type == KF_TYPE_VNODE)
+		okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type;
+	else
+		okif->kf_vnode_type = KF_VTYPE_VNON;
 	strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path));
-	okif->kf_sa_local = kif->kf_sa_local;
-	okif->kf_sa_peer = kif->kf_sa_peer;
+	if (kif->kf_type == KF_TYPE_SOCKET) {
+		okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0;
+		okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0;
+		okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0;
+		okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local;
+		okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer;
+	} else {
+		okif->kf_sa_local.ss_family = AF_UNSPEC;
+		okif->kf_sa_peer.ss_family = AF_UNSPEC;
+	}
 }
 
 static int
 export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif,
     struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req)
 {
 	int error;
 
 	vrefact(vp);
 	FILEDESC_SUNLOCK(fdp);
 	export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO);
 	kinfo_to_okinfo(kif, okif);
 	error = SYSCTL_OUT(req, okif, sizeof(*okif));
 	FILEDESC_SLOCK(fdp);
 	return (error);
 }
 
 /*
  * Get per-process file descriptors for use by procstat(1), et al.
  */
 static int
 sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_ofile *okif;
 	struct kinfo_file *kif;
 	struct filedesc *fdp;
 	int error, i, *name;
 	struct file *fp;
 	struct proc *p;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0)
 		return (error);
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	if (fdp == NULL)
 		return (ENOENT);
 	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
 	okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
 	FILEDESC_SLOCK(fdp);
 	if (fdp->fd_cdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
 		    okif, fdp, req);
 	if (fdp->fd_rdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
 		    okif, fdp, req);
 	if (fdp->fd_jdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
 		    okif, fdp, req);
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
 		export_file_to_kinfo(fp, i, NULL, kif, fdp,
 		    KERN_FILEDESC_PACK_KINFO);
 		FILEDESC_SUNLOCK(fdp);
 		kinfo_to_okinfo(kif, okif);
 		error = SYSCTL_OUT(req, okif, sizeof(*okif));
 		FILEDESC_SLOCK(fdp);
 		if (error)
 			break;
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 	free(kif, M_TEMP);
 	free(okif, M_TEMP);
 	return (0);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc,
     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc,
     "Process ofiledesc entries");
 #endif	/* COMPAT_FREEBSD7 */
 
 int
 vntype_to_kinfo(int vtype)
 {
 	struct {
 		int	vtype;
 		int	kf_vtype;
 	} vtypes_table[] = {
 		{ VBAD, KF_VTYPE_VBAD },
 		{ VBLK, KF_VTYPE_VBLK },
 		{ VCHR, KF_VTYPE_VCHR },
 		{ VDIR, KF_VTYPE_VDIR },
 		{ VFIFO, KF_VTYPE_VFIFO },
 		{ VLNK, KF_VTYPE_VLNK },
 		{ VNON, KF_VTYPE_VNON },
 		{ VREG, KF_VTYPE_VREG },
 		{ VSOCK, KF_VTYPE_VSOCK }
 	};
 	unsigned int i;
 
 	/*
 	 * Perform vtype translation.
 	 */
 	for (i = 0; i < nitems(vtypes_table); i++)
 		if (vtypes_table[i].vtype == vtype)
 			return (vtypes_table[i].kf_vtype);
 
 	return (KF_VTYPE_UNKNOWN);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc,
     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc,
     "Process filedesc entries");
 
 /*
  * Store a process current working directory information to sbuf.
  *
  * Takes a locked proc as argument, and returns with the proc unlocked.
  */
 int
 kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen)
 {
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	if (fdp == NULL)
 		return (EINVAL);
 
 	efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 	efbuf->fdp = fdp;
 	efbuf->sb = sb;
 	efbuf->remainder = maxlen;
 
 	FILEDESC_SLOCK(fdp);
 	if (fdp->fd_cdir == NULL)
 		error = EINVAL;
 	else {
 		vrefact(fdp->fd_cdir);
 		error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD,
 		    FREAD, efbuf);
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 	free(efbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Get per-process current working directory.
  */
 static int
 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct proc *p;
 	ssize_t maxlen;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	maxlen = req->oldptr != NULL ? req->oldlen : -1;
 	error = kern_proc_cwd_out(p, &sb, maxlen);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE,
     sysctl_kern_proc_cwd, "Process current working directory");
 
 #ifdef DDB
 /*
  * For the purposes of debugging, generate a human-readable string for the
  * file type.
  */
 static const char *
 file_type_to_name(short type)
 {
 
 	switch (type) {
 	case 0:
 		return ("zero");
 	case DTYPE_VNODE:
 		return ("vnod");
 	case DTYPE_SOCKET:
 		return ("sock");
 	case DTYPE_PIPE:
 		return ("pipe");
 	case DTYPE_FIFO:
 		return ("fifo");
 	case DTYPE_KQUEUE:
 		return ("kque");
 	case DTYPE_CRYPTO:
 		return ("crpt");
 	case DTYPE_MQUEUE:
 		return ("mque");
 	case DTYPE_SHM:
 		return ("shm");
 	case DTYPE_SEM:
 		return ("ksem");
 	default:
 		return ("unkn");
 	}
 }
 
 /*
  * For the purposes of debugging, identify a process (if any, perhaps one of
  * many) that references the passed file in its file descriptor array. Return
  * NULL if none.
  */
 static struct proc *
 file_to_first_proc(struct file *fp)
 {
 	struct filedesc *fdp;
 	struct proc *p;
 	int n;
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_state == PRS_NEW)
 			continue;
 		fdp = p->p_fd;
 		if (fdp == NULL)
 			continue;
 		for (n = 0; n <= fdp->fd_lastfile; n++) {
 			if (fp == fdp->fd_ofiles[n].fde_file)
 				return (p);
 		}
 	}
 	return (NULL);
 }
 
 static void
 db_print_file(struct file *fp, int header)
 {
 	struct proc *p;
 
 	if (header)
 		db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
 		    "File", "Type", "Data", "Flag", "GCFl", "Count",
 		    "MCount", "Vnode", "FPID", "FCmd");
 	p = file_to_first_proc(fp);
 	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
 	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
 	    0, fp->f_count, 0, fp->f_vnode,
 	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
 }
 
 DB_SHOW_COMMAND(file, db_show_file)
 {
 	struct file *fp;
 
 	if (!have_addr) {
 		db_printf("usage: show file <addr>\n");
 		return;
 	}
 	fp = (struct file *)addr;
 	db_print_file(fp, 1);
 }
 
 DB_SHOW_COMMAND(files, db_show_files)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct proc *p;
 	int header;
 	int n;
 
 	header = 1;
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_state == PRS_NEW)
 			continue;
 		if ((fdp = p->p_fd) == NULL)
 			continue;
 		for (n = 0; n <= fdp->fd_lastfile; ++n) {
 			if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 				continue;
 			db_print_file(fp, header);
 			header = 0;
 		}
 	}
 }
 #endif
 
 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
     &maxfilesperproc, 0, "Maximum files allowed open per process");
 
 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
     &maxfiles, 0, "Maximum number of files");
 
 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
     __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
 
 /* ARGSUSED*/
 static void
 filelistinit(void *dummy)
 {
 
 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
 
 /*-------------------------------------------------------------------*/
 
 static int
 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 static int
 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (0);
 }
 
 static int
 badfo_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_close(struct file *fp, struct thread *td)
 {
 
 	return (0);
 }
 
 static int
 badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 
 	return (0);
 }
 
 struct fileops badfileops = {
 	.fo_read = badfo_readwrite,
 	.fo_write = badfo_readwrite,
 	.fo_truncate = badfo_truncate,
 	.fo_ioctl = badfo_ioctl,
 	.fo_poll = badfo_poll,
 	.fo_kqfilter = badfo_kqfilter,
 	.fo_stat = badfo_stat,
 	.fo_close = badfo_close,
 	.fo_chmod = badfo_chmod,
 	.fo_chown = badfo_chown,
 	.fo_sendfile = badfo_sendfile,
 	.fo_fill_kinfo = badfo_fill_kinfo,
 };
 
 int
 invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_ioctl(struct file *fp, u_long com, void *data,
     struct ucred *active_cred, struct thread *td)
 {
 
 	return (ENOTTY);
 }
 
 int
 invfo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (poll_no_poll(events));
 }
 
 int
 invfo_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 /*-------------------------------------------------------------------*/
 
 /*
  * File Descriptor pseudo-device driver (/dev/fd/).
  *
  * Opening minor device N dup()s the file (if any) connected to file
  * descriptor N belonging to the calling process.  Note that this driver
  * consists of only the ``open()'' routine, because all subsequent
  * references to this file will be direct to the other driver.
  *
  * XXX: we could give this one a cloning event handler if necessary.
  */
 
 /* ARGSUSED */
 static int
 fdopen(struct cdev *dev, int mode, int type, struct thread *td)
 {
 
 	/*
 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
 	 * the file descriptor being sought for duplication. The error
 	 * return ensures that the vnode for this device will be released
 	 * by vn_open. Open will detect this special error and take the
 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 	 * will simply report the error.
 	 */
 	td->td_dupfd = dev2unit(dev);
 	return (ENODEV);
 }
 
 static struct cdevsw fildesc_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	fdopen,
 	.d_name =	"FD",
 };
 
 static void
 fildesc_drvinit(void *unused)
 {
 	struct cdev *dev;
 
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/0");
 	make_dev_alias(dev, "stdin");
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/1");
 	make_dev_alias(dev, "stdout");
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/2");
 	make_dev_alias(dev, "stderr");
 }
 
 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
Index: head/sys/kern/kern_proc.c
===================================================================
--- head/sys/kern/kern_proc.c	(revision 318735)
+++ head/sys/kern/kern_proc.c	(revision 318736)
@@ -1,3116 +1,3125 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/elf.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 SDT_PROVIDER_DEFINE(proc);
 SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *",
     "int");
 SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *",
     "int");
 SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *",
     "struct thread *");
 SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *");
 SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int");
 SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int");
 
 MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
 MALLOC_DEFINE(M_SESSION, "session", "session header");
 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 
 static void doenterpgrp(struct proc *, struct pgrp *);
 static void orphanpg(struct pgrp *pg);
 static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
     int preferthread);
 static void pgadjustjobc(struct pgrp *pgrp, int entering);
 static void pgdelete(struct pgrp *);
 static int proc_ctor(void *mem, int size, void *arg, int flags);
 static void proc_dtor(void *mem, int size, void *arg);
 static int proc_init(void *mem, int size, int flags);
 static void proc_fini(void *mem, int size);
 static void pargs_free(struct pargs *pa);
 static struct proc *zpfind_locked(pid_t pid);
 
 /*
  * Other process lists
  */
 struct pidhashhead *pidhashtbl;
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc;
 struct proclist zombproc;
 struct sx allproc_lock;
 struct sx proctree_lock;
 struct mtx ppeers_lock;
 uma_zone_t proc_zone;
 
 /*
  * The offset of various fields in struct proc and struct thread.
  * These are used by kernel debuggers to enumerate kernel threads and
  * processes.
  */
 const int proc_off_p_pid = offsetof(struct proc, p_pid);
 const int proc_off_p_comm = offsetof(struct proc, p_comm);
 const int proc_off_p_list = offsetof(struct proc, p_list);
 const int proc_off_p_threads = offsetof(struct proc, p_threads);
 const int thread_off_td_tid = offsetof(struct thread, td_tid);
 const int thread_off_td_name = offsetof(struct thread, td_name);
 const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
 const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
 const int thread_off_td_plist = offsetof(struct thread, td_plist);
 
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
 static int vmmap_skip_res_cnt = 0;
 SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
     &vmmap_skip_res_cnt, 0,
     "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
 #endif
 
 /*
  * Initialize global process hashing structures.
  */
 void
 procinit(void)
 {
 
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
 	LIST_INIT(&allproc);
 	LIST_INIT(&zombproc);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
 /*
  * Prepare a proc for use.
  */
 static int
 proc_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct proc *p;
 	struct thread *td;
 
 	p = (struct proc *)mem;
 	SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags);
 	EVENTHANDLER_INVOKE(process_ctor, p);
 	SDT_PROBE4(proc, , ctor , return, p, size, arg, flags);
 	td = FIRST_THREAD_IN_PROC(p);
 	if (td != NULL) {
 		/* Make sure all thread constructors are executed */
 		EVENTHANDLER_INVOKE(thread_ctor, td);
 	}
 	return (0);
 }
 
 /*
  * Reclaim a proc after use.
  */
 static void
 proc_dtor(void *mem, int size, void *arg)
 {
 	struct proc *p;
 	struct thread *td;
 
 	/* INVARIANTS checks go here */
 	p = (struct proc *)mem;
 	td = FIRST_THREAD_IN_PROC(p);
 	SDT_PROBE4(proc, , dtor, entry, p, size, arg, td);
 	if (td != NULL) {
 #ifdef INVARIANTS
 		KASSERT((p->p_numthreads == 1),
 		    ("bad number of threads in exiting process"));
 		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
 
 		/* Make sure all thread destructors are executed */
 		EVENTHANDLER_INVOKE(thread_dtor, td);
 	}
 	EVENTHANDLER_INVOKE(process_dtor, p);
 	if (p->p_ksi != NULL)
 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
 	SDT_PROBE3(proc, , dtor, return, p, size, arg);
 }
 
 /*
  * Initialize type-stable parts of a proc (when newly created).
  */
 static int
 proc_init(void *mem, int size, int flags)
 {
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	SDT_PROBE3(proc, , init, entry, p, size, flags);
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
 	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
 	cv_init(&p->p_pwait, "ppwait");
 	cv_init(&p->p_dbgwait, "dbgwait");
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	EVENTHANDLER_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
 	p->p_pgrp = NULL;
 	SDT_PROBE3(proc, , init, return, p, size, flags);
 	return (0);
 }
 
 /*
  * UMA should ensure that this function is never called.
  * Freeing a proc structure would violate type stability.
  */
 static void
 proc_fini(void *mem, int size)
 {
 #ifdef notnow
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	EVENTHANDLER_INVOKE(process_fini, p);
 	pstats_free(p->p_stats);
 	thread_free(FIRST_THREAD_IN_PROC(p));
 	mtx_destroy(&p->p_mtx);
 	if (p->p_ksi != NULL)
 		ksiginfo_free(p->p_ksi);
 #else
 	panic("proc reclaimed");
 #endif
 }
 
 /*
  * Is p an inferior of the current process?
  */
 int
 inferior(struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (; p != curproc; p = proc_realparent(p)) {
 		if (p->p_pid == 0)
 			return (0);
 	}
 	return (1);
 }
 
 struct proc *
 pfind_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a process by number; return only "live" processes -- i.e., neither
  * zombies nor newly born but incompletely initialized processes.  By not
  * returning processes in the PRS_NEW state, we allow callers to avoid
  * testing for that condition to avoid dereferencing p_ucred, et al.
  */
 struct proc *
 pfind(pid_t pid)
 {
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
 	p = pfind_locked(pid);
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
 
 static struct proc *
 pfind_tid_locked(pid_t tid)
 {
 	struct proc *p;
 	struct thread *td;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (td->td_tid == tid)
 				goto found;
 		}
 		PROC_UNLOCK(p);
 	}
 found:
 	return (p);
 }
 
 /*
  * Locate a process group by number.
  * The caller must hold proctree_lock.
  */
 struct pgrp *
 pgfind(pid_t pgid)
 {
 	struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
 		if (pgrp->pg_id == pgid) {
 			PGRP_LOCK(pgrp);
 			return (pgrp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Locate process and do additional manipulations, depending on flags.
  */
 int
 pget(pid_t pid, int flags, struct proc **pp)
 {
 	struct proc *p;
 	int error;
 
 	sx_slock(&allproc_lock);
 	if (pid <= PID_MAX) {
 		p = pfind_locked(pid);
 		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
 			p = zpfind_locked(pid);
 	} else if ((flags & PGET_NOTID) == 0) {
 		p = pfind_tid_locked(pid);
 	} else {
 		p = NULL;
 	}
 	sx_sunlock(&allproc_lock);
 	if (p == NULL)
 		return (ESRCH);
 	if ((flags & PGET_CANSEE) != 0) {
 		error = p_cansee(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_CANDEBUG) != 0) {
 		error = p_candebug(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
 		error = EPERM;
 		goto errout;
 	}
 	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
 		/*
 		 * XXXRW: Not clear ESRCH is the right error during proc
 		 * execve().
 		 */
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_HOLD) != 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 	}
 	*pp = p;
 	return (0);
 errout:
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Create a new process group.
  * pgid must be equal to the pid of p.
  * Begin a new session if required.
  */
 int
 enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 
 	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
 	KASSERT(p->p_pid == pgid,
 	    ("enterpgrp: new pgrp and pid != pgid"));
 	KASSERT(pgfind(pgid) == NULL,
 	    ("enterpgrp: pgrp with pgid exists"));
 	KASSERT(!SESS_LEADER(p),
 	    ("enterpgrp: session leader attempted setpgrp"));
 
 	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 
 	if (sess != NULL) {
 		/*
 		 * new session
 		 */
 		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
 		PROC_LOCK(p);
 		p->p_flag &= ~P_CONTROLT;
 		PROC_UNLOCK(p);
 		PGRP_LOCK(pgrp);
 		sess->s_leader = p;
 		sess->s_sid = p->p_pid;
 		refcount_init(&sess->s_count, 1);
 		sess->s_ttyvp = NULL;
 		sess->s_ttydp = NULL;
 		sess->s_ttyp = NULL;
 		bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
 		pgrp->pg_session = sess;
 		KASSERT(p == curproc,
 		    ("enterpgrp: mksession and p != curproc"));
 	} else {
 		pgrp->pg_session = p->p_session;
 		sess_hold(pgrp->pg_session);
 		PGRP_LOCK(pgrp);
 	}
 	pgrp->pg_id = pgid;
 	LIST_INIT(&pgrp->pg_members);
 
 	/*
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
 	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
 	pgrp->pg_jobc = 0;
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to an existing process group
  */
 int
 enterthispgrp(struct proc *p, struct pgrp *pgrp)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 	KASSERT(pgrp->pg_session == p->p_session,
 		("%s: pgrp's session %p, p->p_session %p.\n",
 		__func__,
 		pgrp->pg_session,
 		p->p_session));
 	KASSERT(pgrp != p->p_pgrp,
 		("%s: p belongs to pgrp.", __func__));
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to a process group
  */
 static void
 doenterpgrp(struct proc *p, struct pgrp *pgrp)
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 
 	savepgrp = p->p_pgrp;
 
 	/*
 	 * Adjust eligibility of affected pgrps to participate in job control.
 	 * Increment eligibility counts before decrementing, otherwise we
 	 * could reach 0 spuriously during the first call.
 	 */
 	fixjobc(p, pgrp, 1);
 	fixjobc(p, p->p_pgrp, 0);
 
 	PGRP_LOCK(pgrp);
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = pgrp;
 	PROC_UNLOCK(p);
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 	PGRP_UNLOCK(savepgrp);
 	PGRP_UNLOCK(pgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 }
 
 /*
  * remove process from process group
  */
 int
 leavepgrp(struct proc *p)
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	savepgrp = p->p_pgrp;
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = NULL;
 	PROC_UNLOCK(p);
 	PGRP_UNLOCK(savepgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 	return (0);
 }
 
 /*
  * delete a process group
  */
 static void
 pgdelete(struct pgrp *pgrp)
 {
 	struct session *savesess;
 	struct tty *tp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pgid.
 	 */
 	funsetownlst(&pgrp->pg_sigiolst);
 
 	PGRP_LOCK(pgrp);
 	tp = pgrp->pg_session->s_ttyp;
 	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	PGRP_UNLOCK(pgrp);
 
 	/* Remove the reference to the pgrp before deallocating it. */
 	if (tp != NULL) {
 		tty_lock(tp);
 		tty_rel_pgrp(tp, pgrp);
 	}
 
 	mtx_destroy(&pgrp->pg_mtx);
 	free(pgrp, M_PGRP);
 	sess_release(savesess);
 }
 
 static void
 pgadjustjobc(struct pgrp *pgrp, int entering)
 {
 
 	PGRP_LOCK(pgrp);
 	if (entering)
 		pgrp->pg_jobc++;
 	else {
 		--pgrp->pg_jobc;
 		if (pgrp->pg_jobc == 0)
 			orphanpg(pgrp);
 	}
 	PGRP_UNLOCK(pgrp);
 }
 
 /*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
  * process group of the same session).  If that count reaches zero, the
  * process group becomes orphaned.  Check both the specified process'
  * process group and that of its children.
  * entering == 0 => p is leaving specified group.
  * entering == 1 => p is entering specified group.
  */
 void
 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
 {
 	struct pgrp *hispgrp;
 	struct session *mysession;
 	struct proc *q;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Check p's parent to see whether p qualifies its own process
 	 * group; if so, adjust count for p's process group.
 	 */
 	mysession = pgrp->pg_session;
 	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
 	    hispgrp->pg_session == mysession)
 		pgadjustjobc(pgrp, entering);
 
 	/*
 	 * Check this process' children to see whether they qualify
 	 * their process groups; if so, adjust counts for children's
 	 * process groups.
 	 */
 	LIST_FOREACH(q, &p->p_children, p_sibling) {
 		hispgrp = q->p_pgrp;
 		if (hispgrp == pgrp ||
 		    hispgrp->pg_session != mysession)
 			continue;
 		if (q->p_state == PRS_ZOMBIE)
 			continue;
 		pgadjustjobc(hispgrp, entering);
 	}
 }
 
 void
 killjobc(void)
 {
 	struct session *sp;
 	struct tty *tp;
 	struct proc *p;
 	struct vnode *ttyvp;
 
 	p = curproc;
 	MPASS(p->p_flag & P_WEXIT);
 	/*
 	 * Do a quick check to see if there is anything to do with the
 	 * proctree_lock held. pgrp and LIST_EMPTY checks are for fixjobc().
 	 */
 	PROC_LOCK(p);
 	if (!SESS_LEADER(p) &&
 	    (p->p_pgrp == p->p_pptr->p_pgrp) &&
 	    LIST_EMPTY(&p->p_children)) {
 		PROC_UNLOCK(p);
 		return;
 	}
 	PROC_UNLOCK(p);
 
 	sx_xlock(&proctree_lock);
 	if (SESS_LEADER(p)) {
 		sp = p->p_session;
 
 		/*
 		 * s_ttyp is not zero'd; we use this to indicate that
 		 * the session once had a controlling terminal. (for
 		 * logging and informational purposes)
 		 */
 		SESS_LOCK(sp);
 		ttyvp = sp->s_ttyvp;
 		tp = sp->s_ttyp;
 		sp->s_ttyvp = NULL;
 		sp->s_ttydp = NULL;
 		sp->s_leader = NULL;
 		SESS_UNLOCK(sp);
 
 		/*
 		 * Signal foreground pgrp and revoke access to
 		 * controlling terminal if it has not been revoked
 		 * already.
 		 *
 		 * Because the TTY may have been revoked in the mean
 		 * time and could already have a new session associated
 		 * with it, make sure we don't send a SIGHUP to a
 		 * foreground process group that does not belong to this
 		 * session.
 		 */
 
 		if (tp != NULL) {
 			tty_lock(tp);
 			if (tp->t_session == sp)
 				tty_signal_pgrp(tp, SIGHUP);
 			tty_unlock(tp);
 		}
 
 		if (ttyvp != NULL) {
 			sx_xunlock(&proctree_lock);
 			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
 				VOP_REVOKE(ttyvp, REVOKEALL);
 				VOP_UNLOCK(ttyvp, 0);
 			}
 			vrele(ttyvp);
 			sx_xlock(&proctree_lock);
 		}
 	}
 	fixjobc(p, p->p_pgrp, 0);
 	sx_xunlock(&proctree_lock);
 }
 
 /*
  * A process group has become orphaned;
  * if there are any stopped processes in the group,
  * hang-up all process in that group.
  */
 static void
 orphanpg(struct pgrp *pg)
 {
 	struct proc *p;
 
 	PGRP_LOCK_ASSERT(pg, MA_OWNED);
 
 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 		PROC_LOCK(p);
 		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
 			PROC_UNLOCK(p);
 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 				PROC_LOCK(p);
 				kern_psignal(p, SIGHUP);
 				kern_psignal(p, SIGCONT);
 				PROC_UNLOCK(p);
 			}
 			return;
 		}
 		PROC_UNLOCK(p);
 	}
 }
 
 void
 sess_hold(struct session *s)
 {
 
 	refcount_acquire(&s->s_count);
 }
 
 void
 sess_release(struct session *s)
 {
 
 	if (refcount_release(&s->s_count)) {
 		if (s->s_ttyp != NULL) {
 			tty_lock(s->s_ttyp);
 			tty_rel_sess(s->s_ttyp, s);
 		}
 		mtx_destroy(&s->s_mtx);
 		free(s, M_SESSION);
 	}
 }
 
 #ifdef DDB
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	struct pgrp *pgrp;
 	struct proc *p;
 	int i;
 
 	for (i = 0; i <= pgrphash; i++) {
 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
 			printf("\tindx %d\n", i);
 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
 				printf(
 			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
 				    (void *)pgrp, (long)pgrp->pg_id,
 				    (void *)pgrp->pg_session,
 				    pgrp->pg_session->s_count,
 				    (void *)LIST_FIRST(&pgrp->pg_members));
 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 					printf("\t\tpid %ld addr %p pgrp %p\n", 
 					    (long)p->p_pid, (void *)p,
 					    (void *)p->p_pgrp);
 				}
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  * Calculate the kinfo_proc members which contain process-wide
  * informations.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_estcpu = 0;
 	kp->ki_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		kp->ki_pctcpu += sched_pctcpu(td);
 		kp->ki_estcpu += sched_estcpu(td);
 		thread_unlock(td);
 	}
 }
 
 /*
  * Clear kinfo_proc and fill in any information that is common
  * to all threads in the process.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td0;
 	struct tty *tp;
 	struct session *sp;
 	struct ucred *cred;
 	struct sigacts *ps;
 	struct timeval boottime;
 
 	/* For proc_realparent. */
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	bzero(kp, sizeof(*kp));
 
 	kp->ki_structsize = sizeof(*kp);
 	kp->ki_paddr = p;
 	kp->ki_addr =/* p->p_addr; */0; /* XXX */
 	kp->ki_args = p->p_args;
 	kp->ki_textvp = p->p_textvp;
 #ifdef KTRACE
 	kp->ki_tracep = p->p_tracevp;
 	kp->ki_traceflag = p->p_traceflag;
 #endif
 	kp->ki_fd = p->p_fd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
 	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
 		kp->ki_ruid = cred->cr_ruid;
 		kp->ki_svuid = cred->cr_svuid;
 		kp->ki_cr_flags = 0;
 		if (cred->cr_flags & CRED_FLAG_CAPMODE)
 			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
 		/* XXX bde doesn't like KI_NGROUPS */
 		if (cred->cr_ngroups > KI_NGROUPS) {
 			kp->ki_ngroups = KI_NGROUPS;
 			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 		} else
 			kp->ki_ngroups = cred->cr_ngroups;
 		bcopy(cred->cr_groups, kp->ki_groups,
 		    kp->ki_ngroups * sizeof(gid_t));
 		kp->ki_rgid = cred->cr_rgid;
 		kp->ki_svgid = cred->cr_svgid;
 		/* If jailed(cred), emulate the old P_JAILED flag. */
 		if (jailed(cred)) {
 			kp->ki_flag |= P_JAILED;
 			/* If inside the jail, use 0 as a jail ID. */
 			if (cred->cr_prison != curthread->td_ucred->cr_prison)
 				kp->ki_jid = cred->cr_prison->pr_id;
 		}
 		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
 		    sizeof(kp->ki_loginclass));
 	}
 	ps = p->p_sigacts;
 	if (ps) {
 		mtx_lock(&ps->ps_mtx);
 		kp->ki_sigignore = ps->ps_sigignore;
 		kp->ki_sigcatch = ps->ps_sigcatch;
 		mtx_unlock(&ps->ps_mtx);
 	}
 	if (p->p_state != PRS_NEW &&
 	    p->p_state != PRS_ZOMBIE &&
 	    p->p_vmspace != NULL) {
 		struct vmspace *vm = p->p_vmspace;
 
 		kp->ki_size = vm->vm_map.size;
 		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
 		FOREACH_THREAD_IN_PROC(p, td0) {
 			if (!TD_IS_SWAPPED(td0))
 				kp->ki_rssize += td0->td_kstack_pages;
 		}
 		kp->ki_swrss = vm->vm_swrss;
 		kp->ki_tsize = vm->vm_tsize;
 		kp->ki_dsize = vm->vm_dsize;
 		kp->ki_ssize = vm->vm_ssize;
 	} else if (p->p_state == PRS_ZOMBIE)
 		kp->ki_stat = SZOMB;
 	if (kp->ki_flag & P_INMEM)
 		kp->ki_sflag = PS_INMEM;
 	else
 		kp->ki_sflag = 0;
 	/* Calculate legacy swtime as seconds since 'swtick'. */
 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
 	kp->ki_fibnum = p->p_fibnum;
 	kp->ki_start = p->p_stats->p_start;
 	getboottime(&boottime);
 	timevaladd(&kp->ki_start, &boottime);
 	PROC_STATLOCK(p);
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
 	PROC_STATUNLOCK(p);
 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 	/* Some callers want child times in a single value. */
 	kp->ki_childtime = kp->ki_childstime;
 	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		kp->ki_cow += td0->td_cow;
 
 	tp = NULL;
 	if (p->p_pgrp) {
 		kp->ki_pgid = p->p_pgrp->pg_id;
 		kp->ki_jobc = p->p_pgrp->pg_jobc;
 		sp = p->p_pgrp->pg_session;
 
 		if (sp != NULL) {
 			kp->ki_sid = sp->s_sid;
 			SESS_LOCK(sp);
 			strlcpy(kp->ki_login, sp->s_login,
 			    sizeof(kp->ki_login));
 			if (sp->s_ttyvp)
 				kp->ki_kiflag |= KI_CTTY;
 			if (SESS_LEADER(p))
 				kp->ki_kiflag |= KI_SLEADER;
 			/* XXX proctree_lock */
 			tp = sp->s_ttyp;
 			SESS_UNLOCK(sp);
 		}
 	}
 	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
 		kp->ki_tdev = tty_udev(tp);
+		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
 		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		if (tp->t_session)
 			kp->ki_tsid = tp->t_session->s_sid;
-	} else
+	} else {
 		kp->ki_tdev = NODEV;
+		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
+	}
 	if (p->p_comm[0] != '\0')
 		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
 	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
 	    p->p_sysent->sv_name[0] != '\0')
 		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
 	kp->ki_siglist = p->p_siglist;
 	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	kp->ki_acflag = p->p_acflag;
 	kp->ki_lock = p->p_lock;
 	if (p->p_pptr) {
 		kp->ki_ppid = proc_realparent(p)->p_pid;
 		if (p->p_flag & P_TRACED)
 			kp->ki_tracer = p->p_pptr->p_pid;
 	}
 }
 
 /*
  * Fill in information that is thread specific.  Must be called with
  * target process locked.  If 'preferthread' is set, overwrite certain
  * process-related fields that are maintained for both threads and
  * processes.
  */
 static void
 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	kp->ki_tdaddr = td;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (preferthread)
 		PROC_STATLOCK(p);
 	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
 	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
 	    sizeof(kp->ki_tdname)) {
 		strlcpy(kp->ki_moretdname,
 		    td->td_name + sizeof(kp->ki_tdname) - 1,
 		    sizeof(kp->ki_moretdname));
 	} else {
 		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
 	}
 	if (TD_ON_LOCK(td)) {
 		kp->ki_kiflag |= KI_LOCKBLOCK;
 		strlcpy(kp->ki_lockname, td->td_lockname,
 		    sizeof(kp->ki_lockname));
 	} else {
 		kp->ki_kiflag &= ~KI_LOCKBLOCK;
 		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
 	}
 
 	if (p->p_state == PRS_NORMAL) { /* approximate. */
 		if (TD_ON_RUNQ(td) ||
 		    TD_CAN_RUN(td) ||
 		    TD_IS_RUNNING(td)) {
 			kp->ki_stat = SRUN;
 		} else if (P_SHOULDSTOP(p)) {
 			kp->ki_stat = SSTOP;
 		} else if (TD_IS_SLEEPING(td)) {
 			kp->ki_stat = SSLEEP;
 		} else if (TD_ON_LOCK(td)) {
 			kp->ki_stat = SLOCK;
 		} else {
 			kp->ki_stat = SWAIT;
 		}
 	} else if (p->p_state == PRS_ZOMBIE) {
 		kp->ki_stat = SZOMB;
 	} else {
 		kp->ki_stat = SIDL;
 	}
 
 	/* Things in the thread */
 	kp->ki_wchan = td->td_wchan;
 	kp->ki_pri.pri_level = td->td_priority;
 	kp->ki_pri.pri_native = td->td_base_pri;
 
 	/*
 	 * Note: legacy fields; clamp at the old NOCPU value and/or
 	 * the maximum u_char CPU value.
 	 */
 	if (td->td_lastcpu == NOCPU)
 		kp->ki_lastcpu_old = NOCPU_OLD;
 	else if (td->td_lastcpu > MAXCPU_OLD)
 		kp->ki_lastcpu_old = MAXCPU_OLD;
 	else
 		kp->ki_lastcpu_old = td->td_lastcpu;
 
 	if (td->td_oncpu == NOCPU)
 		kp->ki_oncpu_old = NOCPU_OLD;
 	else if (td->td_oncpu > MAXCPU_OLD)
 		kp->ki_oncpu_old = MAXCPU_OLD;
 	else
 		kp->ki_oncpu_old = td->td_oncpu;
 
 	kp->ki_lastcpu = td->td_lastcpu;
 	kp->ki_oncpu = td->td_oncpu;
 	kp->ki_tdflags = td->td_flags;
 	kp->ki_tid = td->td_tid;
 	kp->ki_numthreads = p->p_numthreads;
 	kp->ki_pcb = td->td_pcb;
 	kp->ki_kstack = (void *)td->td_kstack;
 	kp->ki_slptime = (ticks - td->td_slptick) / hz;
 	kp->ki_pri.pri_class = td->td_pri_class;
 	kp->ki_pri.pri_user = td->td_user_pri;
 
 	if (preferthread) {
 		rufetchtd(td, &kp->ki_rusage);
 		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
 		kp->ki_pctcpu = sched_pctcpu(td);
 		kp->ki_estcpu = sched_estcpu(td);
 		kp->ki_cow = td->td_cow;
 	}
 
 	/* We can't get this anymore but ps etc never used it anyway. */
 	kp->ki_rqindex = 0;
 
 	if (preferthread)
 		kp->ki_siglist = td->td_siglist;
 	kp->ki_sigmask = td->td_sigmask;
 	thread_unlock(td);
 	if (preferthread)
 		PROC_STATUNLOCK(p);
 }
 
 /*
  * Fill in a kinfo_proc structure for the specified process.
  * Must be called with the target process locked.
  */
 void
 fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
 {
 
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	fill_kinfo_proc_only(p, kp);
 	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
 	fill_kinfo_aggregate(p, kp);
 }
 
 struct pstats *
 pstats_alloc(void)
 {
 
 	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
 }
 
 /*
  * Copy parts of p_stats; zero the rest of p_stats (statistics).
  */
 void
 pstats_fork(struct pstats *src, struct pstats *dst)
 {
 
 	bzero(&dst->pstat_startzero,
 	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
 	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
 	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
 }
 
 void
 pstats_free(struct pstats *ps)
 {
 
 	free(ps, M_SUBPROC);
 }
 
 static struct proc *
 zpfind_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	LIST_FOREACH(p, &zombproc, p_list) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a zombie process by number
  */
 struct proc *
 zpfind(pid_t pid)
 {
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
 	p = zpfind_locked(pid);
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
 
 #ifdef COMPAT_FREEBSD32
 
 /*
  * This function is typically used to copy out the kernel address, so
  * it can be replaced by assignment of zero.
  */
 static inline uint32_t
 ptr32_trim(void *ptr)
 {
 	uintptr_t uptr;
 
 	uptr = (uintptr_t)ptr;
 	return ((uptr > UINT_MAX) ? 0 : uptr);
 }
 
 #define PTRTRIM_CP(src,dst,fld) \
 	do { (dst).fld = ptr32_trim((src).fld); } while (0)
 
 static void
 freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
 {
 	int i;
 
 	bzero(ki32, sizeof(struct kinfo_proc32));
 	ki32->ki_structsize = sizeof(struct kinfo_proc32);
 	CP(*ki, *ki32, ki_layout);
 	PTRTRIM_CP(*ki, *ki32, ki_args);
 	PTRTRIM_CP(*ki, *ki32, ki_paddr);
 	PTRTRIM_CP(*ki, *ki32, ki_addr);
 	PTRTRIM_CP(*ki, *ki32, ki_tracep);
 	PTRTRIM_CP(*ki, *ki32, ki_textvp);
 	PTRTRIM_CP(*ki, *ki32, ki_fd);
 	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
 	PTRTRIM_CP(*ki, *ki32, ki_wchan);
 	CP(*ki, *ki32, ki_pid);
 	CP(*ki, *ki32, ki_ppid);
 	CP(*ki, *ki32, ki_pgid);
 	CP(*ki, *ki32, ki_tpgid);
 	CP(*ki, *ki32, ki_sid);
 	CP(*ki, *ki32, ki_tsid);
 	CP(*ki, *ki32, ki_jobc);
 	CP(*ki, *ki32, ki_tdev);
+	CP(*ki, *ki32, ki_tdev_freebsd11);
 	CP(*ki, *ki32, ki_siglist);
 	CP(*ki, *ki32, ki_sigmask);
 	CP(*ki, *ki32, ki_sigignore);
 	CP(*ki, *ki32, ki_sigcatch);
 	CP(*ki, *ki32, ki_uid);
 	CP(*ki, *ki32, ki_ruid);
 	CP(*ki, *ki32, ki_svuid);
 	CP(*ki, *ki32, ki_rgid);
 	CP(*ki, *ki32, ki_svgid);
 	CP(*ki, *ki32, ki_ngroups);
 	for (i = 0; i < KI_NGROUPS; i++)
 		CP(*ki, *ki32, ki_groups[i]);
 	CP(*ki, *ki32, ki_size);
 	CP(*ki, *ki32, ki_rssize);
 	CP(*ki, *ki32, ki_swrss);
 	CP(*ki, *ki32, ki_tsize);
 	CP(*ki, *ki32, ki_dsize);
 	CP(*ki, *ki32, ki_ssize);
 	CP(*ki, *ki32, ki_xstat);
 	CP(*ki, *ki32, ki_acflag);
 	CP(*ki, *ki32, ki_pctcpu);
 	CP(*ki, *ki32, ki_estcpu);
 	CP(*ki, *ki32, ki_slptime);
 	CP(*ki, *ki32, ki_swtime);
 	CP(*ki, *ki32, ki_cow);
 	CP(*ki, *ki32, ki_runtime);
 	TV_CP(*ki, *ki32, ki_start);
 	TV_CP(*ki, *ki32, ki_childtime);
 	CP(*ki, *ki32, ki_flag);
 	CP(*ki, *ki32, ki_kiflag);
 	CP(*ki, *ki32, ki_traceflag);
 	CP(*ki, *ki32, ki_stat);
 	CP(*ki, *ki32, ki_nice);
 	CP(*ki, *ki32, ki_lock);
 	CP(*ki, *ki32, ki_rqindex);
 	CP(*ki, *ki32, ki_oncpu);
 	CP(*ki, *ki32, ki_lastcpu);
 
 	/* XXX TODO: wrap cpu value as appropriate */
 	CP(*ki, *ki32, ki_oncpu_old);
 	CP(*ki, *ki32, ki_lastcpu_old);
 
 	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
 	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
 	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
 	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
 	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
 	CP(*ki, *ki32, ki_tracer);
 	CP(*ki, *ki32, ki_flag2);
 	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);
 	CP(*ki, *ki32, ki_numthreads);
 	CP(*ki, *ki32, ki_tid);
 	CP(*ki, *ki32, ki_pri);
 	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
 	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
 	PTRTRIM_CP(*ki, *ki32, ki_udata);
 	CP(*ki, *ki32, ki_sflag);
 	CP(*ki, *ki32, ki_tdflags);
 }
 #endif
 
 int
 kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
 {
 	struct thread *td;
 	struct kinfo_proc ki;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_proc32 ki32;
 #endif
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	error = 0;
 	fill_kinfo_proc(p, &ki);
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			freebsd32_kinfo_proc_out(&ki, &ki32);
 			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 				error = ENOMEM;
 		} else
 #endif
 			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 				error = ENOMEM;
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			fill_kinfo_thread(td, &ki, 1);
 #ifdef COMPAT_FREEBSD32
 			if ((flags & KERN_PROC_MASK32) != 0) {
 				freebsd32_kinfo_proc_out(&ki, &ki32);
 				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 					error = ENOMEM;
 			} else
 #endif
 				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 					error = ENOMEM;
 			if (error != 0)
 				break;
 		}
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
     int doingzomb)
 {
 	struct sbuf sb;
 	struct kinfo_proc ki;
 	struct proc *np;
 	int error, error2;
 	pid_t pid;
 
 	pid = p->p_pid;
 	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = kern_proc_out(p, &sb, flags);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	if (error != 0)
 		return (error);
 	else if (error2 != 0)
 		return (error2);
 	if (doingzomb)
 		np = zpfind(pid);
 	else {
 		if (pid == 0)
 			return (0);
 		np = pfind(pid);
 	}
 	if (np == NULL)
 		return (ESRCH);
 	if (np != p) {
 		PROC_UNLOCK(np);
 		return (ESRCH);
 	}
 	PROC_UNLOCK(np);
 	return (0);
 }
 
 static int
 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, doingzomb, oid_number;
 	int error = 0;
 
 	oid_number = oidp->oid_number;
 	if (oid_number != KERN_PROC_ALL &&
 	    (oid_number & KERN_PROC_INC_THREAD) == 0)
 		flags = KERN_PROC_NOTHREADS;
 	else {
 		flags = 0;
 		oid_number &= ~KERN_PROC_INC_THREAD;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (req->flags & SCTL_MASK32)
 		flags |= KERN_PROC_MASK32;
 #endif
 	if (oid_number == KERN_PROC_PID) {
 		if (namelen != 1)
 			return (EINVAL);
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error)
 			return (error);
 		sx_slock(&proctree_lock);
 		error = pget((pid_t)name[0], PGET_CANSEE, &p);
 		if (error == 0)
 			error = sysctl_out_proc(p, req, flags, 0);
 		sx_sunlock(&proctree_lock);
 		return (error);
 	}
 
 	switch (oid_number) {
 	case KERN_PROC_ALL:
 		if (namelen != 0)
 			return (EINVAL);
 		break;
 	case KERN_PROC_PROC:
 		if (namelen != 0 && namelen != 1)
 			return (EINVAL);
 		break;
 	default:
 		if (namelen != 1)
 			return (EINVAL);
 		break;
 	}
 
 	if (!req->oldptr) {
 		/* overestimate by 5 procs */
 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
 		if (error)
 			return (error);
 	}
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sx_slock(&proctree_lock);
 	sx_slock(&allproc_lock);
 	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
 		if (!doingzomb)
 			p = LIST_FIRST(&allproc);
 		else
 			p = LIST_FIRST(&zombproc);
 		for (; p != NULL; p = LIST_NEXT(p, p_list)) {
 			/*
 			 * Skip embryonic processes.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			KASSERT(p->p_ucred != NULL,
 			    ("process credential is NULL for non-NEW proc"));
 			/*
 			 * Show a user only appropriate processes.
 			 */
 			if (p_cansee(curthread, p)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * TODO - make more efficient (see notes below).
 			 * do by session.
 			 */
 			switch (oid_number) {
 
 			case KERN_PROC_GID:
 				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_PGRP:
 				/* could do this by traversing pgrp */
 				if (p->p_pgrp == NULL ||
 				    p->p_pgrp->pg_id != (pid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_RGID:
 				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_SESSION:
 				if (p->p_session == NULL ||
 				    p->p_session->s_sid != (pid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_TTY:
 				if ((p->p_flag & P_CONTROLT) == 0 ||
 				    p->p_session == NULL) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				/* XXX proctree_lock */
 				SESS_LOCK(p->p_session);
 				if (p->p_session->s_ttyp == NULL ||
 				    tty_udev(p->p_session->s_ttyp) !=
 				    (dev_t)name[0]) {
 					SESS_UNLOCK(p->p_session);
 					PROC_UNLOCK(p);
 					continue;
 				}
 				SESS_UNLOCK(p->p_session);
 				break;
 
 			case KERN_PROC_UID:
 				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_RUID:
 				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_PROC:
 				break;
 
 			default:
 				break;
 
 			}
 
 			error = sysctl_out_proc(p, req, flags, doingzomb);
 			if (error) {
 				sx_sunlock(&allproc_lock);
 				sx_sunlock(&proctree_lock);
 				return (error);
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	sx_sunlock(&proctree_lock);
 	return (0);
 }
 
 struct pargs *
 pargs_alloc(int len)
 {
 	struct pargs *pa;
 
 	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
 		M_WAITOK);
 	refcount_init(&pa->ar_ref, 1);
 	pa->ar_length = len;
 	return (pa);
 }
 
 static void
 pargs_free(struct pargs *pa)
 {
 
 	free(pa, M_PARGS);
 }
 
 void
 pargs_hold(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	refcount_acquire(&pa->ar_ref);
 }
 
 void
 pargs_drop(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	if (refcount_release(&pa->ar_ref))
 		pargs_free(pa);
 }
 
 static int
 proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
     size_t len)
 {
 	ssize_t n;
 
 	/*
 	 * This may return a short read if the string is shorter than the chunk
 	 * and is aligned at the end of the page, and the following page is not
 	 * mapped.
 	 */
 	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
 	if (n <= 0)
 		return (ENOMEM);
 	return (0);
 }
 
 #define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
 
 enum proc_vector_type {
 	PROC_ARG,
 	PROC_ENV,
 	PROC_AUX,
 };
 
 #ifdef COMPAT_FREEBSD32
 static int
 get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct freebsd32_ps_strings pss;
 	Elf32_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	uint32_t *proc_vector32;
 	char **proc_vector;
 	size_t vsize, size;
 	int i, error;
 
 	error = 0;
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_AUX:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
 		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
 		if (vptr % 4 != 0)
 			return (ENOEXEC);
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL);
 	}
 	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
 		error = ENOMEM;
 		goto done;
 	}
 	if (type == PROC_AUX) {
 		*proc_vectorp = (char **)proc_vector32;
 		*vsizep = vsize;
 		return (0);
 	}
 	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
 	for (i = 0; i < (int)vsize; i++)
 		proc_vector[i] = PTRIN(proc_vector32[i]);
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 done:
 	free(proc_vector32, M_TEMP);
 	return (error);
 }
 #endif
 
 static int
 get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct ps_strings pss;
 	Elf_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	char **proc_vector;
 	size_t vsize, size;
 	int i;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
 #endif
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)pss.ps_argvstr;
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)pss.ps_envstr;
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_AUX:
 		/*
 		 * The aux array is just above env array on the stack. Check
 		 * that the address is naturally aligned.
 		 */
 		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
 		    * sizeof(char *);
 #if __ELF_WORD_SIZE == 64
 		if (vptr % sizeof(uint64_t) != 0)
 #else
 		if (vptr % sizeof(uint32_t) != 0)
 #endif
 			return (ENOEXEC);
 		/*
 		 * We count the array size reading the aux vectors from the
 		 * stack until AT_NULL vector is returned.  So (to keep the code
 		 * simple) we read the process stack twice: the first time here
 		 * to find the size and the second time when copying the vectors
 		 * to the allocated proc_vector.
 		 */
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		/*
 		 * If the PROC_AUXV_MAX entries are iterated over, and we have
 		 * not reached AT_NULL, it is most likely we are reading wrong
 		 * data: either the process doesn't have auxv array or data has
 		 * been modified. Return the error in this case.
 		 */
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL); /* In case we are built without INVARIANTS. */
 	}
 	proc_vector = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
 		free(proc_vector, M_TEMP);
 		return (ENOMEM);
 	}
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 
 	return (0);
 }
 
 #define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
 
 static int
 get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
     enum proc_vector_type type)
 {
 	size_t done, len, nchr, vsize;
 	int error, i;
 	char **proc_vector, *sptr;
 	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
 
 	PROC_ASSERT_HELD(p);
 
 	/*
 	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
 	 */
 	nchr = 2 * (PATH_MAX + ARG_MAX);
 
 	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
 	if (error != 0)
 		return (error);
 	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
 		/*
 		 * The program may have scribbled into its argv array, e.g. to
 		 * remove some arguments.  If that has happened, break out
 		 * before trying to read from NULL.
 		 */
 		if (proc_vector[i] == NULL)
 			break;
 		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
 			error = proc_read_string(td, p, sptr, pss_string,
 			    sizeof(pss_string));
 			if (error != 0)
 				goto done;
 			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
 			if (done + len >= nchr)
 				len = nchr - done - 1;
 			sbuf_bcat(sb, pss_string, len);
 			if (len != GET_PS_STRINGS_CHUNK_SZ)
 				break;
 			done += GET_PS_STRINGS_CHUNK_SZ;
 		}
 		sbuf_bcat(sb, "", 1);
 		done += len + 1;
 	}
 done:
 	free(proc_vector, M_TEMP);
 	return (error);
 }
 
 int
 proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ARG));
 }
 
 int
 proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ENV));
 }
 
 int
 proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 	size_t vsize, size;
 	char **auxv;
 	int error;
 
 	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
 	if (error == 0) {
 #ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 			size = vsize * sizeof(Elf32_Auxinfo);
 		else
 #endif
 			size = vsize * sizeof(Elf_Auxinfo);
 		if (sbuf_bcat(sb, auxv, size) != 0)
 			error = ENOMEM;
 		free(auxv, M_TEMP);
 	}
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve the argument list or process
  * title for another process without groping around in the address space
  * of the other process.  It also allow a process to set its own "process 
  * title to a string of its own choice.
  */
 static int
 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct pargs *newpa, *pa;
 	struct proc *p;
 	struct sbuf sb;
 	int flags, error = 0, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	flags = PGET_CANSEE;
 	if (req->newptr != NULL)
 		flags |= PGET_ISCURRENT;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error)
 		return (error);
 
 	pa = p->p_args;
 	if (pa != NULL) {
 		pargs_hold(pa);
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
 		pargs_drop(pa);
 	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 		error = proc_getargv(curthread, p, &sb);
 		error2 = sbuf_finish(&sb);
 		PRELE(p);
 		sbuf_delete(&sb);
 		if (error == 0 && error2 != 0)
 			error = error2;
 	} else {
 		PROC_UNLOCK(p);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
 		return (ENOMEM);
 	newpa = pargs_alloc(req->newlen);
 	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
 	if (error != 0) {
 		pargs_free(newpa);
 		return (error);
 	}
 	PROC_LOCK(p);
 	pa = p->p_args;
 	p->p_args = newpa;
 	PROC_UNLOCK(p);
 	pargs_drop(pa);
 	return (0);
 }
 
 /*
  * This sysctl allows a process to retrieve environment of another process.
  */
 static int
 sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getenvv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve ELF auxiliary vector of
  * another process.
  */
 static int
 sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getauxv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve the path of the executable for
  * itself or another process.
  */
 static int
 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct vnode *vp;
 	char *retbuf, *freebuf;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	vp = p->p_textvp;
 	if (vp == NULL) {
 		if (*pidp != -1)
 			PROC_UNLOCK(p);
 		return (0);
 	}
 	vref(vp);
 	if (*pidp != -1)
 		PROC_UNLOCK(p);
 	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
 	vrele(vp);
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
 	free(freebuf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	char *sv_name;
 	int *name;
 	int namelen;
 	int error;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANSEE, &p);
 	if (error != 0)
 		return (error);
 	sv_name = p->p_sysent->sv_name;
 	PROC_UNLOCK(p);
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
 #ifdef KINFO_OVMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
 #endif
 
 #ifdef COMPAT_FREEBSD7
 static int
 sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
 {
 	vm_map_entry_t entry, tmp_entry;
 	unsigned int last_timestamp;
 	char *fullpath, *freepath;
 	struct kinfo_ovmentry *kve;
 	struct vattr va;
 	struct ucred *cred;
 	int error, *name;
 	struct vnode *vp;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vm;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
 
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		vm_object_t obj, tobj, lobj;
 		vm_offset_t addr;
 
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		bzero(kve, sizeof(*kve));
 		kve->kve_structsize = sizeof(*kve);
 
 		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			VM_OBJECT_RLOCK(obj);
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 		}
 		kve->kve_resident = 0;
 		addr = entry->start;
 		while (addr < entry->end) {
 			if (pmap_extract(map->pmap, addr))
 				kve->kve_resident++;
 			addr += PAGE_SIZE;
 		}
 
 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
 			if (tobj != obj)
 				VM_OBJECT_RLOCK(tobj);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		kve->kve_start = (void*)entry->start;
 		kve->kve_end = (void*)entry->end;
 		kve->kve_offset = (off_t)entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		kve->kve_fileid = 0;
 		kve->kve_fsid = 0;
 		freepath = NULL;
 		fullpath = "";
 		if (lobj) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
 				kve->kve_type = KVME_TYPE_DEFAULT;
 				break;
 			case OBJT_VNODE:
 				kve->kve_type = KVME_TYPE_VNODE;
 				vp = lobj->handle;
 				vref(vp);
 				break;
 			case OBJT_SWAP:
 				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
 					kve->kve_type = KVME_TYPE_VNODE;
 					if ((lobj->flags & OBJ_TMPFS) != 0) {
 						vp = lobj->un_pager.swp.swp_tmpfs;
 						vref(vp);
 					}
 				} else {
 					kve->kve_type = KVME_TYPE_SWAP;
 				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
 				break;
 			case OBJT_PHYS:
 				kve->kve_type = KVME_TYPE_PHYS;
 				break;
 			case OBJT_DEAD:
 				kve->kve_type = KVME_TYPE_DEAD;
 				break;
 			case OBJT_SG:
 				kve->kve_type = KVME_TYPE_SG;
 				break;
 			default:
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 				break;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_fileid = va.va_fileid;
+					/* truncate */
 					kve->kve_fsid = va.va_fsid;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		error = SYSCTL_OUT(req, kve, sizeof(*kve));
 		vm_map_lock_read(map);
 		if (error)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD7 */
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 static void
 kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
     struct kinfo_vmentry *kve)
 {
 	vm_object_t obj, tobj;
 	vm_page_t m, m_adv;
 	vm_offset_t addr;
 	vm_paddr_t locked_pa;
 	vm_pindex_t pi, pi_adv, pindex;
 
 	locked_pa = 0;
 	obj = entry->object.vm_object;
 	addr = entry->start;
 	m_adv = NULL;
 	pi = OFF_TO_IDX(entry->offset);
 	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
 		if (m_adv != NULL) {
 			m = m_adv;
 		} else {
 			pi_adv = atop(entry->end - addr);
 			pindex = pi;
 			for (tobj = obj;; tobj = tobj->backing_object) {
 				m = vm_page_find_least(tobj, pindex);
 				if (m != NULL) {
 					if (m->pindex == pindex)
 						break;
 					if (pi_adv > m->pindex - pindex) {
 						pi_adv = m->pindex - pindex;
 						m_adv = m;
 					}
 				}
 				if (tobj->backing_object == NULL)
 					goto next;
 				pindex += OFF_TO_IDX(tobj->
 				    backing_object_offset);
 			}
 		}
 		m_adv = NULL;
 		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
 		    (addr & (pagesizes[1] - 1)) == 0 &&
 		    (pmap_mincore(map->pmap, addr, &locked_pa) &
 		    MINCORE_SUPER) != 0) {
 			kve->kve_flags |= KVME_FLAG_SUPER;
 			pi_adv = atop(pagesizes[1]);
 		} else {
 			/*
 			 * We do not test the found page on validity.
 			 * Either the page is busy and being paged in,
 			 * or it was invalidated.  The first case
 			 * should be counted as resident, the second
 			 * is not so clear; we do account both.
 			 */
 			pi_adv = 1;
 		}
 		kve->kve_resident += pi_adv;
 next:;
 	}
 	PA_UNLOCK_COND(locked_pa);
 }
 
 /*
  * Must be called with the process locked and will return unlocked.
  */
 int
 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
 {
 	vm_map_entry_t entry, tmp_entry;
 	struct vattr va;
 	vm_map_t map;
 	vm_object_t obj, tobj, lobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vmspace *vm;
 	vm_offset_t addr;
 	unsigned int last_timestamp;
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	_PHOLD(p);
 	PROC_UNLOCK(p);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
 
 	error = 0;
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		addr = entry->end;
 		bzero(kve, sizeof(*kve));
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				VM_OBJECT_RLOCK(tobj);
 				lobj = tobj;
 			}
 			if (obj->backing_object == NULL)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 			if (!vmmap_skip_res_cnt)
 				kern_proc_vmmap_resident(map, entry, kve);
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				if (tobj != obj && tobj != lobj)
 					VM_OBJECT_RUNLOCK(tobj);
 			}
 		} else {
 			lobj = NULL;
 		}
 
 		kve->kve_start = entry->start;
 		kve->kve_end = entry->end;
 		kve->kve_offset = entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 		if (entry->eflags & MAP_ENTRY_GROWS_UP)
 			kve->kve_flags |= KVME_FLAG_GROWS_UP;
 		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
 			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		freepath = NULL;
 		fullpath = "";
 		if (lobj != NULL) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
 				kve->kve_type = KVME_TYPE_DEFAULT;
 				break;
 			case OBJT_VNODE:
 				kve->kve_type = KVME_TYPE_VNODE;
 				vp = lobj->handle;
 				vref(vp);
 				break;
 			case OBJT_SWAP:
 				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
 					kve->kve_type = KVME_TYPE_VNODE;
 					if ((lobj->flags & OBJ_TMPFS) != 0) {
 						vp = lobj->un_pager.swp.swp_tmpfs;
 						vref(vp);
 					}
 				} else {
 					kve->kve_type = KVME_TYPE_SWAP;
 				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
 				break;
 			case OBJT_PHYS:
 				kve->kve_type = KVME_TYPE_PHYS;
 				break;
 			case OBJT_DEAD:
 				kve->kve_type = KVME_TYPE_DEAD;
 				break;
 			case OBJT_SG:
 				kve->kve_type = KVME_TYPE_SG;
 				break;
 			case OBJT_MGTDEVICE:
 				kve->kve_type = KVME_TYPE_MGTDEVICE;
 				break;
 			default:
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 				break;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_vn_fileid = va.va_fileid;
 					kve->kve_vn_fsid = va.va_fsid;
+					kve->kve_vn_fsid_freebsd11 =
+					    kve->kve_vn_fsid; /* truncate */
 					kve->kve_vn_mode =
 					    MAKEIMODE(va.va_type, va.va_mode);
 					kve->kve_vn_size = va.va_size;
 					kve->kve_vn_rdev = va.va_rdev;
+					kve->kve_vn_rdev_freebsd11 =
+					    kve->kve_vn_rdev; /* truncate */
 					kve->kve_status = KF_ATTR_VALID;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
 			kve->kve_structsize =
 			    offsetof(struct kinfo_vmentry, kve_path) +
 			    strlen(kve->kve_path) + 1;
 		else
 			kve->kve_structsize = sizeof(*kve);
 		kve->kve_structsize = roundup(kve->kve_structsize,
 		    sizeof(uint64_t));
 
 		/* Halt filling and truncate rather than exceeding maxlen */
 		if (maxlen != -1 && maxlen < kve->kve_structsize) {
 			error = 0;
 			vm_map_lock_read(map);
 			break;
 		} else if (maxlen != -1)
 			maxlen -= kve->kve_structsize;
 
 		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
 			error = ENOMEM;
 		vm_map_lock_read(map);
 		if (error != 0)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #if defined(STACK) || defined(DDB)
 static int
 sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_kstack *kkstp;
 	int error, i, *name, numthreads;
 	lwpid_t *lwpidarray;
 	struct thread *td;
 	struct stack *st;
 	struct sbuf sb;
 	struct proc *p;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
 	st = stack_create();
 
 	lwpidarray = NULL;
 	PROC_LOCK(p);
 	do {
 		if (lwpidarray != NULL) {
 			free(lwpidarray, M_TEMP);
 			lwpidarray = NULL;
 		}
 		numthreads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
 		    M_WAITOK | M_ZERO);
 		PROC_LOCK(p);
 	} while (numthreads < p->p_numthreads);
 
 	/*
 	 * XXXRW: During the below loop, execve(2) and countless other sorts
 	 * of changes could have taken place.  Should we check to see if the
 	 * vmspace has been replaced, or the like, in order to prevent
 	 * giving a snapshot that spans, say, execve(2), with some threads
 	 * before and some after?  Among other things, the credentials could
 	 * have changed, in which case the right to extract debug info might
 	 * no longer be assured.
 	 */
 	i = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		KASSERT(i < numthreads,
 		    ("sysctl_kern_proc_kstack: numthreads"));
 		lwpidarray[i] = td->td_tid;
 		i++;
 	}
 	numthreads = i;
 	for (i = 0; i < numthreads; i++) {
 		td = thread_find(p, lwpidarray[i]);
 		if (td == NULL) {
 			continue;
 		}
 		bzero(kkstp, sizeof(*kkstp));
 		(void)sbuf_new(&sb, kkstp->kkst_trace,
 		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
 		thread_lock(td);
 		kkstp->kkst_tid = td->td_tid;
 		if (TD_IS_SWAPPED(td)) {
 			kkstp->kkst_state = KKST_STATE_SWAPPED;
 		} else if (TD_IS_RUNNING(td)) {
 			if (stack_save_td_running(st, td) == 0)
 				kkstp->kkst_state = KKST_STATE_STACKOK;
 			else
 				kkstp->kkst_state = KKST_STATE_RUNNING;
 		} else {
 			kkstp->kkst_state = KKST_STATE_STACKOK;
 			stack_save_td(st, td);
 		}
 		thread_unlock(td);
 		PROC_UNLOCK(p);
 		stack_sbuf_print(&sb, st);
 		sbuf_finish(&sb);
 		sbuf_delete(&sb);
 		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
 		PROC_LOCK(p);
 		if (error)
 			break;
 	}
 	_PRELE(p);
 	PROC_UNLOCK(p);
 	if (lwpidarray != NULL)
 		free(lwpidarray, M_TEMP);
 	stack_destroy(st);
 	free(kkstp, M_TEMP);
 	return (error);
 }
 #endif
 
 /*
  * This sysctl allows a process to retrieve the full list of groups from
  * itself or another process.
  */
 static int
 sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct ucred *cred;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	cred = crhold(p->p_ucred);
 	PROC_UNLOCK(p);
 
 	error = SYSCTL_OUT(req, cred->cr_groups,
 	    cred->cr_ngroups * sizeof(gid_t));
 	crfree(cred);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve or/and set the resource limit for
  * another process.
  */
 static int
 sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct rlimit rlim;
 	struct proc *p;
 	u_int which;
 	int flags, error;
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	which = (u_int)name[1];
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(rlim))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Retrieve limit.
 	 */
 	if (req->oldptr != NULL) {
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, which, &rlim);
 		PROC_UNLOCK(p);
 	}
 	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
 	if (error != 0)
 		goto errout;
 
 	/*
 	 * Set limit.
 	 */
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
 		if (error == 0)
 			error = kern_proc_setrlimit(curthread, p, which, &rlim);
 	}
 
 errout:
 	PRELE(p);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve ps_strings structure location of
  * another process.
  */
 static int
 sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	vm_offset_t ps_strings;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	uint32_t ps_strings32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		/*
 		 * We return 0 if the 32 bit emulation request is for a 64 bit
 		 * process.
 		 */
 		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
 		    PTROUT(p->p_sysent->sv_psstrings) : 0;
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
 		return (error);
 	}
 #endif
 	ps_strings = p->p_sysent->sv_psstrings;
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve umask of another process.
  */
 static int
 sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int error;
 	u_short fd_cmask;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	FILEDESC_SLOCK(p->p_fd);
 	fd_cmask = p->p_fd->fd_cmask;
 	FILEDESC_SUNLOCK(p->p_fd);
 	PRELE(p);
 	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to set and retrieve binary osreldate of
  * another process.
  */
 static int
 sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, error, osrel;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(osrel))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
 	if (error != 0)
 		goto errout;
 
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
 		if (error != 0)
 			goto errout;
 		if (osrel < 0) {
 			error = EINVAL;
 			goto errout;
 		}
 		p->p_osrel = osrel;
 	}
 errout:
 	PRELE(p);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct kinfo_sigtramp kst;
 	const struct sysentvec *sv;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_sigtramp32 kst32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 	sv = p->p_sysent;
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		bzero(&kst32, sizeof(kst32));
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			if (sv->sv_sigcode_base != 0) {
 				kst32.ksigtramp_start = sv->sv_sigcode_base;
 				kst32.ksigtramp_end = sv->sv_sigcode_base +
 				    *sv->sv_szsigcode;
 			} else {
 				kst32.ksigtramp_start = sv->sv_psstrings -
 				    *sv->sv_szsigcode;
 				kst32.ksigtramp_end = sv->sv_psstrings;
 			}
 		}
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
 		return (error);
 	}
 #endif
 	bzero(&kst, sizeof(kst));
 	if (sv->sv_sigcode_base != 0) {
 		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
 		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
 		    *sv->sv_szsigcode;
 	} else {
 		kst.ksigtramp_start = (char *)sv->sv_psstrings -
 		    *sv->sv_szsigcode;
 		kst.ksigtramp_end = (char *)sv->sv_psstrings;
 	}
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &kst, sizeof(kst));
 	return (error);
 }
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
 	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
 	"Return entire process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Return process table, no threads");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
 	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_args, "Process argument list");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_env, "Process environment");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
 	"Process syscall vector name (ABI type)");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
 	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
 	"Return process table, no threads");
 
 #ifdef COMPAT_FREEBSD7
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
 
 #if defined(STACK) || defined(DDB)
 static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
 	"Process resource limits");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
 	"Process ps_strings location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
 	"Process binary osreldate");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
 
 int allproc_gen;
 
 /*
  * stop_all_proc() purpose is to stop all process which have usermode,
  * except current process for obvious reasons.  This makes it somewhat
  * unreliable when invoked from multithreaded process.  The service
  * must not be user-callable anyway.
  */
 void
 stop_all_proc(void)
 {
 	struct proc *cp, *p;
 	int r, gen;
 	bool restart, seen_stopped, seen_exiting, stopped_some;
 
 	cp = curproc;
 allproc_loop:
 	sx_xlock(&allproc_lock);
 	gen = allproc_gen;
 	seen_exiting = seen_stopped = stopped_some = restart = false;
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if ((p->p_flag & P_WEXIT) != 0) {
 			seen_exiting = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			/*
 			 * Stopped processes are tolerated when there
 			 * are no other processes which might continue
 			 * them.  P_STOPPED_SINGLE but not
 			 * P_TOTAL_STOP process still has at least one
 			 * thread running.
 			 */
 			seen_stopped = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD(p);
 		sx_xunlock(&allproc_lock);
 		r = thread_single(p, SINGLE_ALLPROC);
 		if (r != 0)
 			restart = true;
 		else
 			stopped_some = true;
 		_PRELE(p);
 		PROC_UNLOCK(p);
 		sx_xlock(&allproc_lock);
 	}
 	/* Catch forked children we did not see in iteration. */
 	if (gen != allproc_gen)
 		restart = true;
 	sx_xunlock(&allproc_lock);
 	if (restart || stopped_some || seen_exiting || seen_stopped) {
 		kern_yield(PRI_USER);
 		goto allproc_loop;
 	}
 }
 
 void
 resume_all_proc(void)
 {
 	struct proc *cp, *p;
 
 	cp = curproc;
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & P_TOTAL_STOP) != 0) {
 			sx_xunlock(&allproc_lock);
 			_PHOLD(p);
 			thread_single_end(p, SINGLE_ALLPROC);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			sx_xlock(&allproc_lock);
 		} else {
 			PROC_UNLOCK(p);
 		}
 	}
 	sx_xunlock(&allproc_lock);
 }
 
 /* #define	TOTAL_STOP_DEBUG	1 */
 #ifdef TOTAL_STOP_DEBUG
 volatile static int ap_resume;
 #include <sys/mount.h>
 
 static int
 sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = 0;
 	ap_resume = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val != 0) {
 		stop_all_proc();
 		syncer_suspend();
 		while (ap_resume == 0)
 			;
 		syncer_resume();
 		resume_all_proc();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
     sysctl_debug_stop_all_proc, "I",
     "");
 #endif
Index: head/sys/kern/makesyscalls.sh
===================================================================
--- head/sys/kern/makesyscalls.sh	(revision 318735)
+++ head/sys/kern/makesyscalls.sh	(revision 318736)
@@ -1,683 +1,712 @@
 #! /bin/sh -
 #	@(#)makesyscalls.sh	8.1 (Berkeley) 6/10/93
 # $FreeBSD$
 
 set -e
 
 # name of compat options:
 compat=COMPAT_43
 compat4=COMPAT_FREEBSD4
 compat6=COMPAT_FREEBSD6
 compat7=COMPAT_FREEBSD7
 compat10=COMPAT_FREEBSD10
+compat11=COMPAT_FREEBSD11
 
 # output files:
 sysnames="syscalls.c"
 sysproto="../sys/sysproto.h"
 sysproto_h=_SYS_SYSPROTO_H_
 syshdr="../sys/syscall.h"
 sysmk="../sys/syscall.mk"
 syssw="init_sysent.c"
 syscallprefix="SYS_"
 switchname="sysent"
 namesname="syscallnames"
 systrace="systrace_args.c"
 
 # tmp files:
 sysaue="sysent.aue.$$"
 sysdcl="sysent.dcl.$$"
 syscompat="sysent.compat.$$"
 syscompatdcl="sysent.compatdcl.$$"
 syscompat4="sysent.compat4.$$"
 syscompat4dcl="sysent.compat4dcl.$$"
 syscompat6="sysent.compat6.$$"
 syscompat6dcl="sysent.compat6dcl.$$"
 syscompat7="sysent.compat7.$$"
 syscompat7dcl="sysent.compat7dcl.$$"
 syscompat10="sysent.compat10.$$"
 syscompat10dcl="sysent.compat10dcl.$$"
+syscompat11="sysent.compat11.$$"
+syscompat11dcl="sysent.compat11dcl.$$"
 sysent="sysent.switch.$$"
 sysinc="sysinc.switch.$$"
 sysarg="sysarg.switch.$$"
 sysprotoend="sysprotoend.$$"
 systracetmp="systrace.$$"
 systraceret="systraceret.$$"
 
 if [ -r capabilities.conf ]; then
 	capenabled=`cat capabilities.conf | grep -v "^#" | grep -v "^$"`
 	capenabled=`echo $capenabled | sed 's/ /,/g'`
 else
 	capenabled=""
 fi
 
-trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0
+trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0
 
-touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret
+touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret
 
 case $# in
     0)	echo "usage: $0 input-file <config-file>" 1>&2
 	exit 1
 	;;
 esac
 
 if [ -n "$2" ]; then
 	. $2
 fi
 
 sed -e '
 :join
 	/\\$/{a\
 
 	N
 	s/\\\n//
 	b join
 	}
 2,${
 	/^#/!s/\([{}()*,]\)/ \1 /g
 }
 ' < $1 | awk "
 	BEGIN {
 		sysaue = \"$sysaue\"
 		sysdcl = \"$sysdcl\"
 		sysproto = \"$sysproto\"
 		sysprotoend = \"$sysprotoend\"
 		sysproto_h = \"$sysproto_h\"
 		syscompat = \"$syscompat\"
 		syscompatdcl = \"$syscompatdcl\"
 		syscompat4 = \"$syscompat4\"
 		syscompat4dcl = \"$syscompat4dcl\"
 		syscompat6 = \"$syscompat6\"
 		syscompat6dcl = \"$syscompat6dcl\"
 		syscompat7 = \"$syscompat7\"
 		syscompat7dcl = \"$syscompat7dcl\"
 		syscompat10 = \"$syscompat10\"
 		syscompat10dcl = \"$syscompat10dcl\"
+		syscompat11 = \"$syscompat11\"
+		syscompat11dcl = \"$syscompat11dcl\"
 		sysent = \"$sysent\"
 		syssw = \"$syssw\"
 		sysinc = \"$sysinc\"
 		sysarg = \"$sysarg\"
 		sysnames = \"$sysnames\"
 		syshdr = \"$syshdr\"
 		sysmk = \"$sysmk\"
 		systrace = \"$systrace\"
 		systracetmp = \"$systracetmp\"
 		systraceret = \"$systraceret\"
 		compat = \"$compat\"
 		compat4 = \"$compat4\"
 		compat6 = \"$compat6\"
 		compat7 = \"$compat7\"
 		compat10 = \"$compat10\"
+		compat11 = \"$compat11\"
 		syscallprefix = \"$syscallprefix\"
 		switchname = \"$switchname\"
 		namesname = \"$namesname\"
 		infile = \"$1\"
 		capenabled_string = \"$capenabled\"
 		"'
 
 		split(capenabled_string, capenabled, ",");
 
 		printf "\n/* The casts are bogus but will do for now. */\n" > sysent
 		printf "struct sysent %s[] = {\n",switchname > sysent
 
 		printf "/*\n * System call switch table.\n *\n" > syssw
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw
 		printf " * $%s$\n", "FreeBSD" > syssw
 		printf " */\n\n" > syssw
 
 		printf "/*\n * System call prototypes.\n *\n" > sysarg
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysarg
 		printf " * $%s$\n", "FreeBSD" > sysarg
 		printf " */\n\n" > sysarg
 		printf "#ifndef %s\n", sysproto_h > sysarg
 		printf "#define\t%s\n\n", sysproto_h > sysarg
 		printf "#include <sys/signal.h>\n" > sysarg
 		printf "#include <sys/acl.h>\n" > sysarg
 		printf "#include <sys/cpuset.h>\n" > sysarg
 		printf "#include <sys/_ffcounter.h>\n" > sysarg
 		printf "#include <sys/_semaphore.h>\n" > sysarg
 		printf "#include <sys/ucontext.h>\n" > sysarg
 		printf "#include <sys/wait.h>\n\n" > sysarg
 		printf "#include <bsm/audit_kevents.h>\n\n" > sysarg
 		printf "struct proc;\n\n" > sysarg
 		printf "struct thread;\n\n" > sysarg
 		printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) ? \\\n" > sysarg
 		printf "\t\t0 : sizeof(register_t) - sizeof(t))\n\n" > sysarg
 		printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg
 		printf "#define\tPADL_(t)\t0\n" > sysarg
 		printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg
 		printf "#else\n" > sysarg
 		printf "#define\tPADL_(t)\tPAD_(t)\n" > sysarg
 		printf "#define\tPADR_(t)\t0\n" > sysarg
 		printf "#endif\n\n" > sysarg
 
 		printf "\n#ifdef %s\n\n", compat > syscompat
 		printf "\n#ifdef %s\n\n", compat4 > syscompat4
 		printf "\n#ifdef %s\n\n", compat6 > syscompat6
 		printf "\n#ifdef %s\n\n", compat7 > syscompat7
 		printf "\n#ifdef %s\n\n", compat10 > syscompat10
+		printf "\n#ifdef %s\n\n", compat11 > syscompat11
 
 		printf "/*\n * System call names.\n *\n" > sysnames
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames
 		printf " * $%s$\n", "FreeBSD" > sysnames
 		printf " */\n\n" > sysnames
 		printf "const char *%s[] = {\n", namesname > sysnames
 
 		printf "/*\n * System call numbers.\n *\n" > syshdr
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr
 		printf " * $%s$\n", "FreeBSD" > syshdr
 		printf " */\n\n" > syshdr
 
 		printf "# FreeBSD system call object files.\n" > sysmk
 		printf "# DO NOT EDIT-- this file is automatically generated.\n" > sysmk
 		printf "# $%s$\n", "FreeBSD" > sysmk
 		printf "MIASM = " > sysmk
 
 		printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace
 		printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace
 		printf " * $%s$\n", "FreeBSD" > systrace
 		printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace
 		printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace
 		printf "\tint64_t *iarg  = (int64_t *) uarg;\n" > systrace
 		printf "\tswitch (sysnum) {\n" > systrace
 
 		printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp
 		printf "\tswitch (sysnum) {\n" > systracetmp
 
 		printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret
 		printf "\tswitch (sysnum) {\n" > systraceret
 	}
 	NR == 1 {
 		next
 	}
 	NF == 0 || $1 ~ /^;/ {
 		next
 	}
 	$1 ~ /^#[ 	]*include/ {
 		print > sysinc
 		next
 	}
 	$1 ~ /^#[ 	]*if/ {
 		print > sysent
 		print > sysdcl
 		print > sysarg
 		print > syscompat
 		print > syscompat4
 		print > syscompat6
 		print > syscompat7
 		print > syscompat10
+		print > syscompat11
 		print > sysnames
 		print > systrace
 		print > systracetmp
 		print > systraceret
 		savesyscall = syscall
 		next
 	}
 	$1 ~ /^#[ 	]*else/ {
 		print > sysent
 		print > sysdcl
 		print > sysarg
 		print > syscompat
 		print > syscompat4
 		print > syscompat6
 		print > syscompat7
 		print > syscompat10
+		print > syscompat11
 		print > sysnames
 		print > systrace
 		print > systracetmp
 		print > systraceret
 		syscall = savesyscall
 		next
 	}
 	$1 ~ /^#/ {
 		print > sysent
 		print > sysdcl
 		print > sysarg
 		print > syscompat
 		print > syscompat4
 		print > syscompat6
 		print > syscompat7
 		print > syscompat10
+		print > syscompat11
 		print > sysnames
 		print > systrace
 		print > systracetmp
 		print > systraceret
 		next
 	}
 	syscall != $1 {
 		printf "%s: line %d: syscall number out of sync at %d\n",
 		    infile, NR, syscall
 		printf "line is:\n"
 		print
 		exit 1
 	}
 	# Returns true if the type "name" is the first flag in the type field
 	function type(name, flags, n) {
 		n = split($3, flags, /\|/)
 		return (n > 0 && flags[1] == name)
 	}
 	# Returns true if the flag "name" is set in the type field
 	function flag(name, flags, i, n) {
 		n = split($3, flags, /\|/)
 		for (i = 1; i <= n; i++)
 			if (flags[i] == name)
 				return 1
 		return 0
 	}
 	function align_sysent_comment(column) {
 		printf("\t") > sysent
 		column = column + 8 - column % 8
 		while (column < 56) {
 			printf("\t") > sysent
 			column = column + 8
 		}
 	}
 	function parserr(was, wanted) {
 		printf "%s: line %d: unexpected %s (expected %s)\n",
 		    infile, NR, was, wanted
 		exit 1
 	}
 	function parseline() {
 		f=4			# toss number, type, audit event
 		argc= 0;
 		argssize = "0"
 		thr_flag = "SY_THR_STATIC"
 		if (flag("NOTSTATIC")) {
 			thr_flag = "SY_THR_ABSENT"
 		}
 		if ($NF != "}") {
 			funcalias=$(NF-2)
 			argalias=$(NF-1)
 			rettype=$NF
 			end=NF-3
 		} else {
 			funcalias=""
 			argalias=""
 			rettype="int"
 			end=NF
 		}
 		if (flag("NODEF")) {
 			auditev="AUE_NULL"
 			funcname=$4
 			argssize = "AS(" $6 ")"
 			return
 		}
 		if ($f != "{")
 			parserr($f, "{")
 		f++
 		if ($end != "}")
 			parserr($end, "}")
 		end--
 		if ($end != ";")
 			parserr($end, ";")
 		end--
 		if ($end != ")")
 			parserr($end, ")")
 		end--
 
 		syscallret=$f
 		f++
 
 		funcname=$f
 
 		#
 		# We now know the func name, so define a flags field for it.
 		# Do this before any other processing as we may return early
 		# from it.
 		#
 		for (cap in capenabled) {
 			if (funcname == capenabled[cap]) {
 				flags = "SYF_CAPENABLED";
 				break;
 			}
 		}
 
 		if (funcalias == "")
 			funcalias = funcname
 		if (argalias == "") {
 			argalias = funcname "_args"
 			if (flag("COMPAT"))
 				argalias = "o" argalias
 			if (flag("COMPAT4"))
 				argalias = "freebsd4_" argalias
 			if (flag("COMPAT6"))
 				argalias = "freebsd6_" argalias
 			if (flag("COMPAT7"))
 				argalias = "freebsd7_" argalias
 			if (flag("COMPAT10"))
 				argalias = "freebsd10_" argalias
+			if (flag("COMPAT11"))
+				argalias = "freebsd11_" argalias
 		}
 		f++
 
 		if ($f != "(")
 			parserr($f, ")")
 		f++
 
 		if (f == end) {
 			if ($f != "void")
 				parserr($f, "argument definition")
 			return
 		}
 
 		while (f <= end) {
 			argc++
 			argtype[argc]=""
 			oldf=""
 			while (f < end && $(f+1) != ",") {
 				if (argtype[argc] != "" && oldf != "*")
 					argtype[argc] = argtype[argc]" ";
 				argtype[argc] = argtype[argc]$f;
 				oldf = $f;
 				f++
 			}
 			if (argtype[argc] == "")
 				parserr($f, "argument definition")
 			argname[argc]=$f;
 			f += 2;			# skip name, and any comma
 		}
 		if (argc != 0)
 			argssize = "AS(" argalias ")"
 	}
 	{	comment = $4
 		if (NF < 7)
 			for (i = 5; i <= NF; i++)
 				comment = comment " " $i
 	}
 
 	#
 	# The AUE_ audit event identifier.
 	#
 	{
 		auditev = $2;
 	}
 
 	#
 	# The flags, if any.
 	#
 	{
 		flags = "0";
 	}
 
 	type("STD") || type("NODEF") || type("NOARGS") || type("NOPROTO") \
 	    || type("NOSTD") {
 		parseline()
 		printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace
 		printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp
 		printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret
 		if (argc > 0) {
 			printf("\t\tswitch(ndx) {\n") > systracetmp
 			printf("\t\tstruct %s *p = params;\n", argalias) > systrace
 			for (i = 1; i <= argc; i++) {
 				arg = argtype[i]
 				sub("__restrict$", "", arg)
 				if (index(arg, "*") > 0)
 					printf("\t\tcase %d:\n\t\t\tp = \"userland %s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp
 				else
 					printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp
 				if (index(arg, "*") > 0 || arg == "caddr_t")
 					printf("\t\tuarg[%d] = (intptr_t) p->%s; /* %s */\n", \
 					     i - 1, \
 					     argname[i], arg) > systrace
 				else if (arg == "union l_semun")
 					printf("\t\tuarg[%d] = p->%s.buf; /* %s */\n", \
 					     i - 1, \
 					     argname[i], arg) > systrace
 				else if (substr(arg, 1, 1) == "u" || arg == "size_t")
 					printf("\t\tuarg[%d] = p->%s; /* %s */\n", \
 					     i - 1, \
 					     argname[i], arg) > systrace
 				else
 					printf("\t\tiarg[%d] = p->%s; /* %s */\n", \
 					     i - 1, \
 					     argname[i], arg) > systrace
 			}
 			printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp
 
 			printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret
 			printf("\t\t\tp = \"%s\";\n", syscallret) > systraceret
 			printf("\t\tbreak;\n") > systraceret
 		}
 		printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace
 		printf("\t\tbreak;\n") > systracetmp
 		if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \
 		    !flag("NODEF")) {
 			printf("struct %s {\n", argalias) > sysarg
 			for (i = 1; i <= argc; i++)
 				printf("\tchar %s_l_[PADL_(%s)]; " \
 				    "%s %s; char %s_r_[PADR_(%s)];\n",
 				    argname[i], argtype[i],
 				    argtype[i], argname[i],
 				    argname[i], argtype[i]) > sysarg
 			printf("};\n") > sysarg
 		}
 		else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF"))
 			printf("struct %s {\n\tregister_t dummy;\n};\n",
 			    argalias) > sysarg
 		if (!flag("NOPROTO") && !flag("NODEF")) {
 			if (funcname == "nosys" || funcname == "lkmnosys" ||
 			    funcname == "sysarch" || funcname ~ /^freebsd/ || 
 			    funcname ~ /^linux/ || funcname ~ /^ibcs2/ ||
 			    funcname ~ /^xenix/ || funcname ~ /^cloudabi/) {
 				printf("%s\t%s(struct thread *, struct %s *)",
 				    rettype, funcname, argalias) > sysdcl
 			} else {
 				printf("%s\tsys_%s(struct thread *, struct %s *)",
 				    rettype, funcname, argalias) > sysdcl
 			} 
 			printf(";\n") > sysdcl
 			printf("#define\t%sAUE_%s\t%s\n", syscallprefix,
 			    funcalias, auditev) > sysaue
 		}
 		printf("\t{ %s, (sy_call_t *)", argssize) > sysent
 		column = 8 + 2 + length(argssize) + 15
 		if (flag("NOSTD")) {
 			printf("lkmressys, AUE_NULL, NULL, 0, 0, %s, SY_THR_ABSENT },", flags) > sysent
 			column = column + length("lkmressys") + length("AUE_NULL") + 3
 		} else {
 			if (funcname == "nosys" || funcname == "sysarch" || 
 			    funcname == "lkmnosys" || funcname ~ /^freebsd/ ||
 			    funcname ~ /^linux/ || funcname ~ /^ibcs2/ ||
 			    funcname ~ /^xenix/ || funcname ~ /^cloudabi/) {
 				printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent
 				column = column + length(funcname) + length(auditev) + length(flags) + 3 
 			} else {
 				printf("sys_%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent
 				column = column + length(funcname) + length(auditev) + length(flags) + 3 + 4
 			} 
 		} 
 		align_sysent_comment(column)
 		printf("/* %d = %s */\n", syscall, funcalias) > sysent
 		printf("\t\"%s\",\t\t\t/* %d = %s */\n",
 		    funcalias, syscall, funcalias) > sysnames
 		if (!flag("NODEF")) {
 			printf("#define\t%s%s\t%d\n", syscallprefix,
 		    	    funcalias, syscall) > syshdr
 			printf(" \\\n\t%s.o", funcalias) > sysmk
 		}
 		syscall++
 		next
 	}
 	type("COMPAT") || type("COMPAT4") || type("COMPAT6") || \
-	    type("COMPAT7") || type("COMPAT10") {
+	    type("COMPAT7") || type("COMPAT10") || type("COMPAT11") {
 		if (flag("COMPAT")) {
 			ncompat++
 			out = syscompat
 			outdcl = syscompatdcl
 			wrap = "compat"
 			prefix = "o"
 			descr = "old"
 		} else if (flag("COMPAT4")) {
 			ncompat4++
 			out = syscompat4
 			outdcl = syscompat4dcl
 			wrap = "compat4"
 			prefix = "freebsd4_"
 			descr = "freebsd4"
 		} else if (flag("COMPAT6")) {
 			ncompat6++
 			out = syscompat6
 			outdcl = syscompat6dcl
 			wrap = "compat6"
 			prefix = "freebsd6_"
 			descr = "freebsd6"
 		} else if (flag("COMPAT7")) {
 			ncompat7++
 			out = syscompat7
 			outdcl = syscompat7dcl
 			wrap = "compat7"
 			prefix = "freebsd7_"
 			descr = "freebsd7"
 		} else if (flag("COMPAT10")) {
 			ncompat10++
 			out = syscompat10
 			outdcl = syscompat10dcl
 			wrap = "compat10"
 			prefix = "freebsd10_"
 			descr = "freebsd10"
+		} else if (flag("COMPAT11")) {
+			ncompat11++
+			out = syscompat11
+			outdcl = syscompat11dcl
+			wrap = "compat11"
+			prefix = "freebsd11_"
+			descr = "freebsd11"
 		}
 		parseline()
 		if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \
 		    !flag("NODEF")) {
 			printf("struct %s {\n", argalias) > out
 			for (i = 1; i <= argc; i++)
 				printf("\tchar %s_l_[PADL_(%s)]; %s %s; " \
 				    "char %s_r_[PADR_(%s)];\n",
 				    argname[i], argtype[i],
 				    argtype[i], argname[i],
 				    argname[i], argtype[i]) > out
 			printf("};\n") > out
 		}
 		else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF"))
 			printf("struct %s {\n\tregister_t dummy;\n};\n",
 			    argalias) > sysarg
 		if (!flag("NOPROTO") && !flag("NODEF")) {
 			printf("%s\t%s%s(struct thread *, struct %s *);\n",
 			    rettype, prefix, funcname, argalias) > outdcl
 			printf("#define\t%sAUE_%s%s\t%s\n", syscallprefix,
 			    prefix, funcname, auditev) > sysaue
 		}
 		if (flag("NOSTD")) {
 			printf("\t{ %s, (sy_call_t *)%s, %s, NULL, 0, 0, 0, SY_THR_ABSENT },",
 			    "0", "lkmressys", "AUE_NULL") > sysent
 			align_sysent_comment(8 + 2 + length("0") + 15 + \
 			    length("lkmressys") + length("AUE_NULL") + 3)
 		} else {
 			printf("\t{ %s(%s,%s), %s, NULL, 0, 0, %s, %s },",
 			    wrap, argssize, funcname, auditev, flags, thr_flag) > sysent
 			align_sysent_comment(8 + 9 + length(argssize) + 1 + \
 			    length(funcname) + length(auditev) + \
 			    length(flags) + 4)
 		}
 		printf("/* %d = %s %s */\n", syscall, descr, funcalias) > sysent
 		printf("\t\"%s.%s\",\t\t/* %d = %s %s */\n",
 		    wrap, funcalias, syscall, descr, funcalias) > sysnames
 		# Do not provide freebsdN_* symbols in libc for < FreeBSD 7
 		if (flag("COMPAT") || flag("COMPAT4") || flag("COMPAT6")) {
 			printf("\t\t\t\t/* %d is %s %s */\n",
 			    syscall, descr, funcalias) > syshdr
 		} else if (!flag("NODEF")) {
 			printf("#define\t%s%s%s\t%d\n", syscallprefix,
 			    prefix, funcalias, syscall) > syshdr
 			printf(" \\\n\t%s%s.o", prefix, funcalias) > sysmk
 		}
 		syscall++
 		next
 	}
 	type("OBSOL") {
 		printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },") > sysent
 		align_sysent_comment(34)
 		printf("/* %d = obsolete %s */\n", syscall, comment) > sysent
 		printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n",
 		    $4, syscall, comment) > sysnames
 		printf("\t\t\t\t/* %d is obsolete %s */\n",
 		    syscall, comment) > syshdr
 		syscall++
 		next
 	}
 	type("UNIMPL") {
 		printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },\t\t\t/* %d = %s */\n",
 		    syscall, comment) > sysent
 		printf("\t\"#%d\",\t\t\t/* %d = %s */\n",
 		    syscall, syscall, comment) > sysnames
 		syscall++
 		next
 	}
 	{
 		printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $3
 		exit 1
 	}
 	END {
 		printf "\n#define AS(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc
 
-		if (ncompat != 0 || ncompat4 != 0 || ncompat6 != 0 || ncompat7 != 0 || ncompat10 != 0)
+		if (ncompat != 0 || ncompat4 != 0 || ncompat6 != 0 || ncompat7 != 0 || ncompat10 != 0 || ncompat11 != 0)
 			printf "#include \"opt_compat.h\"\n\n" > syssw
 
 		if (ncompat != 0) {
 			printf "\n#ifdef %s\n", compat > sysinc
 			printf "#define compat(n, name) n, (sy_call_t *)__CONCAT(o,name)\n" > sysinc
 			printf "#else\n" > sysinc
 			printf "#define compat(n, name) 0, (sy_call_t *)nosys\n" > sysinc
 			printf "#endif\n" > sysinc
 		}
 
 		if (ncompat4 != 0) {
 			printf "\n#ifdef %s\n", compat4 > sysinc
 			printf "#define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name)\n" > sysinc
 			printf "#else\n" > sysinc
 			printf "#define compat4(n, name) 0, (sy_call_t *)nosys\n" > sysinc
 			printf "#endif\n" > sysinc
 		}
 
 		if (ncompat6 != 0) {
 			printf "\n#ifdef %s\n", compat6 > sysinc
 			printf "#define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name)\n" > sysinc
 			printf "#else\n" > sysinc
 			printf "#define compat6(n, name) 0, (sy_call_t *)nosys\n" > sysinc
 			printf "#endif\n" > sysinc
 		}
 
 		if (ncompat7 != 0) {
 			printf "\n#ifdef %s\n", compat7 > sysinc
 			printf "#define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name)\n" > sysinc
 			printf "#else\n" > sysinc
 			printf "#define compat7(n, name) 0, (sy_call_t *)nosys\n" > sysinc
 			printf "#endif\n" > sysinc
 		}
 		if (ncompat10 != 0) {
 			printf "\n#ifdef %s\n", compat10 > sysinc
 			printf "#define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name)\n" > sysinc
 			printf "#else\n" > sysinc
 			printf "#define compat10(n, name) 0, (sy_call_t *)nosys\n" > sysinc
 			printf "#endif\n" > sysinc
 		}
+		if (ncompat11 != 0) {
+			printf "\n#ifdef %s\n", compat11 > sysinc
+			printf "#define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name)\n" > sysinc
+			printf "#else\n" > sysinc
+			printf "#define compat11(n, name) 0, (sy_call_t *)nosys\n" > sysinc
+			printf "#endif\n" > sysinc
+		}
+
 		printf("\n#endif /* %s */\n\n", compat) > syscompatdcl
 		printf("\n#endif /* %s */\n\n", compat4) > syscompat4dcl
 		printf("\n#endif /* %s */\n\n", compat6) > syscompat6dcl
 		printf("\n#endif /* %s */\n\n", compat7) > syscompat7dcl
 		printf("\n#endif /* %s */\n\n", compat10) > syscompat10dcl
+		printf("\n#endif /* %s */\n\n", compat11) > syscompat11dcl
 
 		printf("\n#undef PAD_\n") > sysprotoend
 		printf("#undef PADL_\n") > sysprotoend
 		printf("#undef PADR_\n") > sysprotoend
 		printf("\n#endif /* !%s */\n", sysproto_h) > sysprotoend
 
 		printf("\n") > sysmk
 		printf("};\n") > sysent
 		printf("};\n") > sysnames
 		printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall) \
 		    > syshdr
 		printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace
 		printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp
 		printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret
 	} '
 
 cat $sysinc $sysent >> $syssw
 cat $sysarg $sysdcl \
 	$syscompat $syscompatdcl \
 	$syscompat4 $syscompat4dcl \
 	$syscompat6 $syscompat6dcl \
 	$syscompat7 $syscompat7dcl \
 	$syscompat10 $syscompat10dcl \
+	$syscompat11 $syscompat11dcl \
 	$sysaue $sysprotoend > $sysproto
 cat $systracetmp >> $systrace
 cat $systraceret >> $systrace
 
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c	(revision 318735)
+++ head/sys/kern/sys_socket.c	(revision 318736)
@@ -1,804 +1,807 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/aio.h>
 #include <sys/domain.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/ucred.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/user.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static SYSCTL_NODE(_kern_ipc, OID_AUTO, aio, CTLFLAG_RD, NULL,
     "socket AIO stats");
 
 static int empty_results;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_results, CTLFLAG_RD, &empty_results,
     0, "socket operation returned EAGAIN");
 
 static int empty_retries;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_retries, CTLFLAG_RD, &empty_retries,
     0, "socket operation retries");
 
 static fo_rdwr_t soo_read;
 static fo_rdwr_t soo_write;
 static fo_ioctl_t soo_ioctl;
 static fo_poll_t soo_poll;
 extern fo_kqfilter_t soo_kqfilter;
 static fo_stat_t soo_stat;
 static fo_close_t soo_close;
 static fo_fill_kinfo_t soo_fill_kinfo;
 static fo_aio_queue_t soo_aio_queue;
 
 static void	soo_aio_cancel(struct kaiocb *job);
 
 struct fileops	socketops = {
 	.fo_read = soo_read,
 	.fo_write = soo_write,
 	.fo_truncate = invfo_truncate,
 	.fo_ioctl = soo_ioctl,
 	.fo_poll = soo_poll,
 	.fo_kqfilter = soo_kqfilter,
 	.fo_stat = soo_stat,
 	.fo_close = soo_close,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_fill_kinfo = soo_fill_kinfo,
 	.fo_aio_queue = soo_aio_queue,
 	.fo_flags = DFLAG_PASSABLE
 };
 
 static int
 soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_receive(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = soreceive(so, 0, uio, 0, 0, 0);
 	return (error);
 }
 
 static int
 soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_send(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
 	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 		PROC_LOCK(uio->uio_td->td_proc);
 		tdsignal(uio->uio_td, SIGPIPE);
 		PROC_UNLOCK(uio->uio_td->td_proc);
 	}
 	return (error);
 }
 
 static int
 soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error = 0;
 
 	switch (cmd) {
 	case FIONBIO:
 		SOCK_LOCK(so);
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		SOCK_UNLOCK(so);
 		break;
 
 	case FIOASYNC:
 		/*
 		 * XXXRW: This code separately acquires SOCK_LOCK(so) and
 		 * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
 		 * mutex to avoid introducing the assumption that they are
 		 * the same.
 		 */
 		if (*(int *)data) {
 			SOCK_LOCK(so);
 			so->so_state |= SS_ASYNC;
 			SOCK_UNLOCK(so);
 			SOCKBUF_LOCK(&so->so_rcv);
 			so->so_rcv.sb_flags |= SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags |= SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		} else {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ASYNC;
 			SOCK_UNLOCK(so);
 			SOCKBUF_LOCK(&so->so_rcv);
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags &= ~SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 		break;
 
 	case FIONREAD:
 		/* Unlocked read. */
 		*(int *)data = sbavail(&so->so_rcv);
 		break;
 
 	case FIONWRITE:
 		/* Unlocked read. */
 		*(int *)data = sbavail(&so->so_snd);
 		break;
 
 	case FIONSPACE:
 		/* Unlocked read. */
 		if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) ||
 		    (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
 			*(int *)data = 0;
 		else
 			*(int *)data = sbspace(&so->so_snd);
 		break;
 
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &so->so_sigio);
 		break;
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&so->so_sigio);
 		break;
 
 	case SIOCSPGRP:
 		error = fsetown(-(*(int *)data), &so->so_sigio);
 		break;
 
 	case SIOCGPGRP:
 		*(int *)data = -fgetown(&so->so_sigio);
 		break;
 
 	case SIOCATMARK:
 		/* Unlocked read. */
 		*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
 		break;
 	default:
 		/*
 		 * Interface/routing/protocol specific ioctls: interface and
 		 * routing ioctls should have a different entry since a
 		 * socket is unnecessary.
 		 */
 		if (IOCGROUP(cmd) == 'i')
 			error = ifioctl(so, cmd, data, td);
 		else if (IOCGROUP(cmd) == 'r') {
 			CURVNET_SET(so->so_vnet);
 			error = rtioctl_fib(cmd, data, so->so_fibnum);
 			CURVNET_RESTORE();
 		} else {
 			CURVNET_SET(so->so_vnet);
 			error = ((*so->so_proto->pr_usrreqs->pru_control)
 			    (so, cmd, data, 0, td));
 			CURVNET_RESTORE();
 		}
 		break;
 	}
 	return (error);
 }
 
 static int
 soo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 
 	error = mac_socket_check_poll(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	return (sopoll(so, events, fp->f_cred, td));
 }
 
 static int
 soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	struct sockbuf *sb;
 #ifdef MAC
 	int error;
 #endif
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 #ifdef MAC
 	error = mac_socket_check_stat(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	/*
 	 * If SBS_CANTRCVMORE is set, but there's still data left in the
 	 * receive buffer, the socket is still readable.
 	 */
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb))
 		ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
 	ub->st_size = sbavail(sb) - sb->sb_ctl;
 	SOCKBUF_UNLOCK(sb);
 
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	if ((sb->sb_state & SBS_CANTSENDMORE) == 0)
 		ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 	SOCKBUF_UNLOCK(sb);
 	ub->st_uid = so->so_cred->cr_uid;
 	ub->st_gid = so->so_cred->cr_gid;
 	return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub);
 }
 
 /*
  * API socket close on file pointer.  We call soclose() to close the socket
  * (including initiating closing protocols).  soclose() will sorele() the
  * file reference but the actual socket will not go away until the socket's
  * ref count hits 0.
  */
 static int
 soo_close(struct file *fp, struct thread *td)
 {
 	int error = 0;
 	struct socket *so;
 
 	so = fp->f_data;
 	fp->f_ops = &badfileops;
 	fp->f_data = NULL;
 
 	if (so)
 		error = soclose(so);
 	return (error);
 }
 
 static int
 soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct sockaddr *sa;
 	struct inpcb *inpcb;
 	struct unpcb *unpcb;
 	struct socket *so;
 	int error;
 
 	kif->kf_type = KF_TYPE_SOCKET;
 	so = fp->f_data;
-	kif->kf_sock_domain = so->so_proto->pr_domain->dom_family;
-	kif->kf_sock_type = so->so_type;
-	kif->kf_sock_protocol = so->so_proto->pr_protocol;
+	kif->kf_un.kf_sock.kf_sock_domain0 =
+	    so->so_proto->pr_domain->dom_family;
+	kif->kf_un.kf_sock.kf_sock_type0 = so->so_type;
+	kif->kf_un.kf_sock.kf_sock_protocol0 = so->so_proto->pr_protocol;
 	kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb;
-	switch (kif->kf_sock_domain) {
+	switch (kif->kf_un.kf_sock.kf_sock_domain0) {
 	case AF_INET:
 	case AF_INET6:
-		if (kif->kf_sock_protocol == IPPROTO_TCP) {
+		if (kif->kf_un.kf_sock.kf_sock_protocol0 == IPPROTO_TCP) {
 			if (so->so_pcb != NULL) {
 				inpcb = (struct inpcb *)(so->so_pcb);
 				kif->kf_un.kf_sock.kf_sock_inpcb =
 				    (uintptr_t)inpcb->inp_ppcb;
 			}
 		}
 		break;
 	case AF_UNIX:
 		if (so->so_pcb != NULL) {
 			unpcb = (struct unpcb *)(so->so_pcb);
 			if (unpcb->unp_conn) {
 				kif->kf_un.kf_sock.kf_sock_unpconn =
 				    (uintptr_t)unpcb->unp_conn;
 				kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
 				    so->so_rcv.sb_state;
 				kif->kf_un.kf_sock.kf_sock_snd_sb_state =
 				    so->so_snd.sb_state;
 			}
 		}
 		break;
 	}
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
-	if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
-		bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+	if (error == 0 &&
+	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_local)) {
+		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_local, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
-	if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
-		bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+	if (error == 0 &&
+	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_peer)) {
+		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_peer, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name,
 	    sizeof(kif->kf_path));
 	return (0);	
 }
 
 /*
  * Use the 'backend3' field in AIO jobs to store the amount of data
  * completed by the AIO job so far.
  */
 #define	aio_done	backend3
 
 static STAILQ_HEAD(, task) soaio_jobs;
 static struct mtx soaio_jobs_lock;
 static struct task soaio_kproc_task;
 static int soaio_starting, soaio_idle, soaio_queued;
 static struct unrhdr *soaio_kproc_unr;
 
 static int soaio_max_procs = MAX_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, max_procs, CTLFLAG_RW, &soaio_max_procs, 0,
     "Maximum number of kernel processes to use for async socket IO");
 
 static int soaio_num_procs;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, num_procs, CTLFLAG_RD, &soaio_num_procs, 0,
     "Number of active kernel processes for async socket IO");
 
 static int soaio_target_procs = TARGET_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, target_procs, CTLFLAG_RD,
     &soaio_target_procs, 0,
     "Preferred number of ready kernel processes for async socket IO");
 
 static int soaio_lifetime;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, lifetime, CTLFLAG_RW, &soaio_lifetime, 0,
     "Maximum lifetime for idle aiod");
 
 static void
 soaio_kproc_loop(void *arg)
 {
 	struct proc *p;
 	struct vmspace *myvm;
 	struct task *task;
 	int error, id, pending;
 
 	id = (intptr_t)arg;
 
 	/*
 	 * Grab an extra reference on the daemon's vmspace so that it
 	 * doesn't get freed by jobs that switch to a different
 	 * vmspace.
 	 */
 	p = curproc;
 	myvm = vmspace_acquire_ref(p);
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(soaio_starting > 0);
 	soaio_starting--;
 	for (;;) {
 		while (!STAILQ_EMPTY(&soaio_jobs)) {
 			task = STAILQ_FIRST(&soaio_jobs);
 			STAILQ_REMOVE_HEAD(&soaio_jobs, ta_link);
 			soaio_queued--;
 			pending = task->ta_pending;
 			task->ta_pending = 0;
 			mtx_unlock(&soaio_jobs_lock);
 
 			task->ta_func(task->ta_context, pending);
 
 			mtx_lock(&soaio_jobs_lock);
 		}
 		MPASS(soaio_queued == 0);
 
 		if (p->p_vmspace != myvm) {
 			mtx_unlock(&soaio_jobs_lock);
 			vmspace_switch_aio(myvm);
 			mtx_lock(&soaio_jobs_lock);
 			continue;
 		}
 
 		soaio_idle++;
 		error = mtx_sleep(&soaio_idle, &soaio_jobs_lock, 0, "-",
 		    soaio_lifetime);
 		soaio_idle--;
 		if (error == EWOULDBLOCK && STAILQ_EMPTY(&soaio_jobs) &&
 		    soaio_num_procs > soaio_target_procs)
 			break;
 	}
 	soaio_num_procs--;
 	mtx_unlock(&soaio_jobs_lock);
 	free_unr(soaio_kproc_unr, id);
 	kproc_exit(0);
 }
 
 static void
 soaio_kproc_create(void *context, int pending)
 {
 	struct proc *p;
 	int error, id;
 
 	mtx_lock(&soaio_jobs_lock);
 	for (;;) {
 		if (soaio_num_procs < soaio_target_procs) {
 			/* Must create */
 		} else if (soaio_num_procs >= soaio_max_procs) {
 			/*
 			 * Hit the limit on kernel processes, don't
 			 * create another one.
 			 */
 			break;
 		} else if (soaio_queued <= soaio_idle + soaio_starting) {
 			/*
 			 * No more AIO jobs waiting for a process to be
 			 * created, so stop.
 			 */
 			break;
 		}
 		soaio_starting++;
 		mtx_unlock(&soaio_jobs_lock);
 
 		id = alloc_unr(soaio_kproc_unr);
 		error = kproc_create(soaio_kproc_loop, (void *)(intptr_t)id,
 		    &p, 0, 0, "soaiod%d", id);
 		if (error != 0) {
 			free_unr(soaio_kproc_unr, id);
 			mtx_lock(&soaio_jobs_lock);
 			soaio_starting--;
 			break;
 		}
 
 		mtx_lock(&soaio_jobs_lock);
 		soaio_num_procs++;
 	}
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 void
 soaio_enqueue(struct task *task)
 {
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(task->ta_pending == 0);
 	task->ta_pending++;
 	STAILQ_INSERT_TAIL(&soaio_jobs, task, ta_link);
 	soaio_queued++;
 	if (soaio_queued <= soaio_idle)
 		wakeup_one(&soaio_idle);
 	else if (soaio_num_procs < soaio_max_procs)
 		taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 static void
 soaio_init(void)
 {
 
 	soaio_lifetime = AIOD_LIFETIME_DEFAULT;
 	STAILQ_INIT(&soaio_jobs);
 	mtx_init(&soaio_jobs_lock, "soaio jobs", NULL, MTX_DEF);
 	soaio_kproc_unr = new_unrhdr(1, INT_MAX, NULL);
 	TASK_INIT(&soaio_kproc_task, 0, soaio_kproc_create, NULL);
 	if (soaio_target_procs > 0)
 		taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
 }
 SYSINIT(soaio, SI_SUB_VFS, SI_ORDER_ANY, soaio_init, NULL);
 
 static __inline int
 soaio_ready(struct socket *so, struct sockbuf *sb)
 {
 	return (sb == &so->so_rcv ? soreadable(so) : sowriteable(so));
 }
 
 static void
 soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
 {
 	struct ucred *td_savedcred;
 	struct thread *td;
 	struct file *fp;
 	struct uio uio;
 	struct iovec iov;
 	size_t cnt, done;
 	long ru_before;
 	int error, flags;
 
 	SOCKBUF_UNLOCK(sb);
 	aio_switch_vmspace(job);
 	td = curthread;
 	fp = job->fd_file;
 retry:
 	td_savedcred = td->td_ucred;
 	td->td_ucred = job->cred;
 
 	done = job->aio_done;
 	cnt = job->uaiocb.aio_nbytes - done;
 	iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done);
 	iov.iov_len = cnt;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = cnt;
 	uio.uio_segflg = UIO_USERSPACE;
 	uio.uio_td = td;
 	flags = MSG_NBIO;
 
 	/*
 	 * For resource usage accounting, only count a completed request
 	 * as a single message to avoid counting multiple calls to
 	 * sosend/soreceive on a blocking socket.
 	 */
 
 	if (sb == &so->so_rcv) {
 		uio.uio_rw = UIO_READ;
 		ru_before = td->td_ru.ru_msgrcv;
 #ifdef MAC
 		error = mac_socket_check_receive(fp->f_cred, so);
 		if (error == 0)
 
 #endif
 			error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
 		if (td->td_ru.ru_msgrcv != ru_before)
 			job->msgrcv = 1;
 	} else {
 		if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 			flags |= MSG_MORETOCOME;
 		uio.uio_rw = UIO_WRITE;
 		ru_before = td->td_ru.ru_msgsnd;
 #ifdef MAC
 		error = mac_socket_check_send(fp->f_cred, so);
 		if (error == 0)
 #endif
 			error = sosend(so, NULL, &uio, NULL, NULL, flags, td);
 		if (td->td_ru.ru_msgsnd != ru_before)
 			job->msgsnd = 1;
 		if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 	}
 
 	done += cnt - uio.uio_resid;
 	job->aio_done = done;
 	td->td_ucred = td_savedcred;
 
 	if (error == EWOULDBLOCK) {
 		/*
 		 * The request was either partially completed or not
 		 * completed at all due to racing with a read() or
 		 * write() on the socket.  If the socket is
 		 * non-blocking, return with any partial completion.
 		 * If the socket is blocking or if no progress has
 		 * been made, requeue this request at the head of the
 		 * queue to try again when the socket is ready.
 		 */
 		MPASS(done != job->uaiocb.aio_nbytes);
 		SOCKBUF_LOCK(sb);
 		if (done == 0 || !(so->so_state & SS_NBIO)) {
 			empty_results++;
 			if (soaio_ready(so, sb)) {
 				empty_retries++;
 				SOCKBUF_UNLOCK(sb);
 				goto retry;
 			}
 			
 			if (!aio_set_cancel_function(job, soo_aio_cancel)) {
 				SOCKBUF_UNLOCK(sb);
 				if (done != 0)
 					aio_complete(job, done, 0);
 				else
 					aio_cancel(job);
 				SOCKBUF_LOCK(sb);
 			} else {
 				TAILQ_INSERT_HEAD(&sb->sb_aiojobq, job, list);
 			}
 			return;
 		}
 		SOCKBUF_UNLOCK(sb);
 	}		
 	if (done != 0 && (error == ERESTART || error == EINTR ||
 	    error == EWOULDBLOCK))
 		error = 0;
 	if (error)
 		aio_complete(job, -1, error);
 	else
 		aio_complete(job, done, 0);
 	SOCKBUF_LOCK(sb);
 }
 
 static void
 soaio_process_sb(struct socket *so, struct sockbuf *sb)
 {
 	struct kaiocb *job;
 
 	SOCKBUF_LOCK(sb);
 	while (!TAILQ_EMPTY(&sb->sb_aiojobq) && soaio_ready(so, sb)) {
 		job = TAILQ_FIRST(&sb->sb_aiojobq);
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 		if (!aio_clear_cancel_function(job))
 			continue;
 
 		soaio_process_job(so, sb, job);
 	}
 
 	/*
 	 * If there are still pending requests, the socket must not be
 	 * ready so set SB_AIO to request a wakeup when the socket
 	 * becomes ready.
 	 */
 	if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags |= SB_AIO;
 	sb->sb_flags &= ~SB_AIO_RUNNING;
 	SOCKBUF_UNLOCK(sb);
 
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	sorele(so);
 }
 
 void
 soaio_rcv(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_rcv);
 }
 
 void
 soaio_snd(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_snd);
 }
 
 void
 sowakeup_aio(struct socket *so, struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_flags &= ~SB_AIO;
 	if (sb->sb_flags & SB_AIO_RUNNING)
 		return;
 	sb->sb_flags |= SB_AIO_RUNNING;
 	if (sb == &so->so_snd)
 		SOCK_LOCK(so);
 	soref(so);
 	if (sb == &so->so_snd)
 		SOCK_UNLOCK(so);
 	soaio_enqueue(&sb->sb_aiotask);
 }
 
 static void
 soo_aio_cancel(struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	long done;
 	int opcode;
 
 	so = job->fd_file->f_data;
 	opcode = job->uaiocb.aio_lio_opcode;
 	if (opcode == LIO_READ)
 		sb = &so->so_rcv;
 	else {
 		MPASS(opcode == LIO_WRITE);
 		sb = &so->so_snd;
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 	if (TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags &= ~SB_AIO;
 	SOCKBUF_UNLOCK(sb);
 
 	done = job->aio_done;
 	if (done != 0)
 		aio_complete(job, done, 0);
 	else
 		aio_cancel(job);
 }
 
 static int
 soo_aio_queue(struct file *fp, struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	int error;
 
 	so = fp->f_data;
 	error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job);
 	if (error == 0)
 		return (0);
 
 	switch (job->uaiocb.aio_lio_opcode) {
 	case LIO_READ:
 		sb = &so->so_rcv;
 		break;
 	case LIO_WRITE:
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_set_cancel_function(job, soo_aio_cancel))
 		panic("new job was cancelled");
 	TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
 	if (!(sb->sb_flags & SB_AIO_RUNNING)) {
 		if (soaio_ready(so, sb))
 			sowakeup_aio(so, sb);
 		else
 			sb->sb_flags |= SB_AIO;
 	}
 	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
Index: head/sys/kern/syscalls.master
===================================================================
--- head/sys/kern/syscalls.master	(revision 318735)
+++ head/sys/kern/syscalls.master	(revision 318736)
@@ -1,1002 +1,1022 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number audit type name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	audit	the audit event associated with the system call
 ;		A value of AUE_NULL means no auditing, but it also means that
 ;		there is no audit event for the call at this time. For the
 ;		case where the event exists, but we don't want auditing, the
 ;		event should be #defined to AUE_NULL in audit_kevents.h.
 ;	type	one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6,
-;		COMPAT7, NODEF, NOARGS, NOPROTO, NOSTD
+;		COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD
 ;		The COMPAT* options may be combined with one or more NO*
 ;		options separated by '|' with no spaces (e.g. COMPAT|NOARGS)
 ;	name	psuedo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat)
 ;	COMPAT6	included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat)
 ;	COMPAT7	included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat)
 ;	COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat)
+;	COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat)
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;		compiled in; sysent entry will be filled with lkmressys
 ;		so the SYSCALL_MODULE macro works
 ;	NOARGS	same as STD except do not create structure in sys/sysproto.h
 ;	NODEF	same as STD except only have the entry in the syscall table
 ;		added.  Meaning - do not create structure or function
 ;		prototype in sys/sysproto.h
 ;	NOPROTO	same as STD except do not create structure or
 ;		function prototype in sys/sysproto.h.  Does add a
 ;		definition to syscall.h besides adding a sysent.
 ;	NOTSTATIC syscall is loadable
 ;
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
 
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	AUE_NULL	STD	{ int nosys(void); } syscall nosys_args int
 1	AUE_EXIT	STD	{ void sys_exit(int rval); } exit \
 				    sys_exit_args void
 2	AUE_FORK	STD	{ int fork(void); }
 3	AUE_READ	STD	{ ssize_t read(int fd, void *buf, \
 				    size_t nbyte); }
 4	AUE_WRITE	STD	{ ssize_t write(int fd, const void *buf, \
 				    size_t nbyte); }
 5	AUE_OPEN_RWTC	STD	{ int open(char *path, int flags, int mode); }
 ; XXX should be		{ int open(const char *path, int flags, ...); }
 ; but we're not ready for `const' or varargs.
 ; XXX man page says `mode_t mode'.
 6	AUE_CLOSE	STD	{ int close(int fd); }
 7	AUE_WAIT4	STD	{ int wait4(int pid, int *status, \
 				    int options, struct rusage *rusage); }
 8	AUE_CREAT	COMPAT	{ int creat(char *path, int mode); }
 9	AUE_LINK	STD	{ int link(char *path, char *link); }
 10	AUE_UNLINK	STD	{ int unlink(char *path); }
 11	AUE_NULL	OBSOL	execv
 12	AUE_CHDIR	STD	{ int chdir(char *path); }
 13	AUE_FCHDIR	STD	{ int fchdir(int fd); }
-14	AUE_MKNOD	STD	{ int mknod(char *path, int mode, int dev); }
+14	AUE_MKNOD	COMPAT11 { int mknod(char *path, int mode, int dev); }
 15	AUE_CHMOD	STD	{ int chmod(char *path, int mode); }
 16	AUE_CHOWN	STD	{ int chown(char *path, int uid, int gid); }
 17	AUE_NULL	STD	{ int obreak(char *nsize); } break \
 				    obreak_args int
 18	AUE_GETFSSTAT	COMPAT4	{ int getfsstat(struct ostatfs *buf, \
 				    long bufsize, int mode); }
 19	AUE_LSEEK	COMPAT	{ long lseek(int fd, long offset, \
 				    int whence); }
 20	AUE_GETPID	STD	{ pid_t getpid(void); }
 21	AUE_MOUNT	STD	{ int mount(char *type, char *path, \
 				    int flags, caddr_t data); }
 ; XXX `path' should have type `const char *' but we're not ready for that.
 22	AUE_UMOUNT	STD	{ int unmount(char *path, int flags); }
 23	AUE_SETUID	STD	{ int setuid(uid_t uid); }
 24	AUE_GETUID	STD	{ uid_t getuid(void); }
 25	AUE_GETEUID	STD	{ uid_t geteuid(void); }
 26	AUE_PTRACE	STD	{ int ptrace(int req, pid_t pid, \
 				    caddr_t addr, int data); }
 27	AUE_RECVMSG	STD	{ int recvmsg(int s, struct msghdr *msg, \
 				    int flags); }
 28	AUE_SENDMSG	STD	{ int sendmsg(int s, struct msghdr *msg, \
 				    int flags); }
 29	AUE_RECVFROM	STD	{ int recvfrom(int s, caddr_t buf, \
 				    size_t len, int flags, \
 				    struct sockaddr * __restrict from, \
 				    __socklen_t * __restrict fromlenaddr); }
 30	AUE_ACCEPT	STD	{ int accept(int s, \
 				    struct sockaddr * __restrict name, \
 				    __socklen_t * __restrict anamelen); }
 31	AUE_GETPEERNAME	STD	{ int getpeername(int fdes, \
 				    struct sockaddr * __restrict asa, \
 				    __socklen_t * __restrict alen); }
 32	AUE_GETSOCKNAME	STD	{ int getsockname(int fdes, \
 				    struct sockaddr * __restrict asa, \
 				    __socklen_t * __restrict alen); }
 33	AUE_ACCESS	STD	{ int access(char *path, int amode); }
 34	AUE_CHFLAGS	STD	{ int chflags(const char *path, u_long flags); }
 35	AUE_FCHFLAGS	STD	{ int fchflags(int fd, u_long flags); }
 36	AUE_SYNC	STD	{ int sync(void); }
 37	AUE_KILL	STD	{ int kill(int pid, int signum); }
 38	AUE_STAT	COMPAT	{ int stat(char *path, struct ostat *ub); }
 39	AUE_GETPPID	STD	{ pid_t getppid(void); }
 40	AUE_LSTAT	COMPAT	{ int lstat(char *path, struct ostat *ub); }
 41	AUE_DUP		STD	{ int dup(u_int fd); }
 42	AUE_PIPE	COMPAT10	{ int pipe(void); }
 43	AUE_GETEGID	STD	{ gid_t getegid(void); }
 44	AUE_PROFILE	STD	{ int profil(caddr_t samples, size_t size, \
 				    size_t offset, u_int scale); }
 45	AUE_KTRACE	STD	{ int ktrace(const char *fname, int ops, \
 				    int facs, int pid); }
 46	AUE_SIGACTION	COMPAT	{ int sigaction(int signum, \
 				    struct osigaction *nsa, \
 				    struct osigaction *osa); }
 47	AUE_GETGID	STD	{ gid_t getgid(void); }
 48	AUE_SIGPROCMASK	COMPAT	{ int sigprocmask(int how, osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it, and we return the old mask as the
 ; (int) return value.
 49	AUE_GETLOGIN	STD	{ int getlogin(char *namebuf, u_int \
 				    namelen); }
 50	AUE_SETLOGIN	STD	{ int setlogin(char *namebuf); }
 51	AUE_ACCT	STD	{ int acct(char *path); }
 52	AUE_SIGPENDING	COMPAT	{ int sigpending(void); }
 53	AUE_SIGALTSTACK	STD	{ int sigaltstack(stack_t *ss, \
 				    stack_t *oss); }
 54	AUE_IOCTL	STD	{ int ioctl(int fd, u_long com, \
 				    caddr_t data); }
 55	AUE_REBOOT	STD	{ int reboot(int opt); }
 56	AUE_REVOKE	STD	{ int revoke(char *path); }
 57	AUE_SYMLINK	STD	{ int symlink(char *path, char *link); }
 58	AUE_READLINK	STD	{ ssize_t readlink(char *path, char *buf, \
 				    size_t count); }
 59	AUE_EXECVE	STD	{ int execve(char *fname, char **argv, \
 				    char **envv); }
 60	AUE_UMASK	STD	{ int umask(int newmask); } umask umask_args \
 				    int
 61	AUE_CHROOT	STD	{ int chroot(char *path); }
 62	AUE_FSTAT	COMPAT	{ int fstat(int fd, struct ostat *sb); }
 63	AUE_NULL	COMPAT	{ int getkerninfo(int op, char *where, \
 				    size_t *size, int arg); } getkerninfo \
 				    getkerninfo_args int
 64	AUE_NULL	COMPAT	{ int getpagesize(void); } getpagesize \
 				    getpagesize_args int
 65	AUE_MSYNC	STD	{ int msync(void *addr, size_t len, \
 				    int flags); }
 66	AUE_VFORK	STD	{ int vfork(void); }
 67	AUE_NULL	OBSOL	vread
 68	AUE_NULL	OBSOL	vwrite
 69	AUE_SBRK	STD	{ int sbrk(int incr); }
 70	AUE_SSTK	STD	{ int sstk(int incr); }
 71	AUE_MMAP	COMPAT	{ int mmap(void *addr, int len, int prot, \
 				    int flags, int fd, long pos); }
 72	AUE_O_VADVISE	STD	{ int ovadvise(int anom); } vadvise \
 				    ovadvise_args int
 73	AUE_MUNMAP	STD	{ int munmap(void *addr, size_t len); }
 74	AUE_MPROTECT	STD	{ int mprotect(void *addr, size_t len, \
 				    int prot); }
 75	AUE_MADVISE	STD	{ int madvise(void *addr, size_t len, \
 				    int behav); }
 76	AUE_NULL	OBSOL	vhangup
 77	AUE_NULL	OBSOL	vlimit
 78	AUE_MINCORE	STD	{ int mincore(const void *addr, size_t len, \
 				    char *vec); }
 79	AUE_GETGROUPS	STD	{ int getgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 80	AUE_SETGROUPS	STD	{ int setgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 81	AUE_GETPGRP	STD	{ int getpgrp(void); }
 82	AUE_SETPGRP	STD	{ int setpgid(int pid, int pgid); }
 83	AUE_SETITIMER	STD	{ int setitimer(u_int which, struct \
 				    itimerval *itv, struct itimerval *oitv); }
 84	AUE_WAIT4	COMPAT	{ int wait(void); }
 85	AUE_SWAPON	STD	{ int swapon(char *name); }
 86	AUE_GETITIMER	STD	{ int getitimer(u_int which, \
 				    struct itimerval *itv); }
 87	AUE_SYSCTL	COMPAT	{ int gethostname(char *hostname, \
 				    u_int len); } gethostname \
 				    gethostname_args int
 88	AUE_SYSCTL	COMPAT	{ int sethostname(char *hostname, \
 				    u_int len); } sethostname \
 				    sethostname_args int
 89	AUE_GETDTABLESIZE	STD	{ int getdtablesize(void); }
 90	AUE_DUP2	STD	{ int dup2(u_int from, u_int to); }
 91	AUE_NULL	UNIMPL	getdopt
 92	AUE_FCNTL	STD	{ int fcntl(int fd, int cmd, long arg); }
 ; XXX should be	{ int fcntl(int fd, int cmd, ...); }
 ; but we're not ready for varargs.
 93	AUE_SELECT	STD	{ int select(int nd, fd_set *in, fd_set *ou, \
 				    fd_set *ex, struct timeval *tv); }
 94	AUE_NULL	UNIMPL	setdopt
 95	AUE_FSYNC	STD	{ int fsync(int fd); }
 96	AUE_SETPRIORITY	STD	{ int setpriority(int which, int who, \
 				    int prio); }
 97	AUE_SOCKET	STD	{ int socket(int domain, int type, \
 				    int protocol); }
 98	AUE_CONNECT	STD	{ int connect(int s, caddr_t name, \
 				    int namelen); }
 99	AUE_ACCEPT	COMPAT|NOARGS { int accept(int s, caddr_t name, \
 				    int *anamelen); } accept accept_args int
 100	AUE_GETPRIORITY	STD	{ int getpriority(int which, int who); }
 101	AUE_SEND	COMPAT	{ int send(int s, caddr_t buf, int len, \
 				    int flags); }
 102	AUE_RECV	COMPAT	{ int recv(int s, caddr_t buf, int len, \
 				    int flags); }
 103	AUE_SIGRETURN	COMPAT	{ int sigreturn( \
 				    struct osigcontext *sigcntxp); }
 104	AUE_BIND	STD	{ int bind(int s, caddr_t name, \
 				    int namelen); }
 105	AUE_SETSOCKOPT	STD	{ int setsockopt(int s, int level, int name, \
 				    caddr_t val, int valsize); }
 106	AUE_LISTEN	STD	{ int listen(int s, int backlog); }
 107	AUE_NULL	OBSOL	vtimes
 108	AUE_NULL	COMPAT	{ int sigvec(int signum, struct sigvec *nsv, \
 				    struct sigvec *osv); }
 109	AUE_NULL	COMPAT	{ int sigblock(int mask); }
 110	AUE_NULL	COMPAT	{ int sigsetmask(int mask); }
 111	AUE_NULL	COMPAT	{ int sigsuspend(osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it.
 112	AUE_NULL	COMPAT	{ int sigstack(struct sigstack *nss, \
 				    struct sigstack *oss); }
 113	AUE_RECVMSG	COMPAT	{ int recvmsg(int s, struct omsghdr *msg, \
 				    int flags); }
 114	AUE_SENDMSG	COMPAT	{ int sendmsg(int s, caddr_t msg, \
 				    int flags); }
 115	AUE_NULL	OBSOL	vtrace
 116	AUE_GETTIMEOFDAY	STD	{ int gettimeofday(struct timeval *tp, \
 				    struct timezone *tzp); }
 117	AUE_GETRUSAGE	STD	{ int getrusage(int who, \
 				    struct rusage *rusage); }
 118	AUE_GETSOCKOPT	STD	{ int getsockopt(int s, int level, int name, \
 				    caddr_t val, int *avalsize); }
 119	AUE_NULL	UNIMPL	resuba (BSD/OS 2.x)
 120	AUE_READV	STD	{ int readv(int fd, struct iovec *iovp, \
 				    u_int iovcnt); }
 121	AUE_WRITEV	STD	{ int writev(int fd, struct iovec *iovp, \
 				    u_int iovcnt); }
 122	AUE_SETTIMEOFDAY	STD	{ int settimeofday(struct timeval *tv, \
 				    struct timezone *tzp); }
 123	AUE_FCHOWN	STD	{ int fchown(int fd, int uid, int gid); }
 124	AUE_FCHMOD	STD	{ int fchmod(int fd, int mode); }
 125	AUE_RECVFROM	COMPAT|NOARGS { int recvfrom(int s, caddr_t buf, \
 				    size_t len, int flags, caddr_t from, int \
 				    *fromlenaddr); } recvfrom recvfrom_args \
 				    int
 126	AUE_SETREUID	STD	{ int setreuid(int ruid, int euid); }
 127	AUE_SETREGID	STD	{ int setregid(int rgid, int egid); }
 128	AUE_RENAME	STD	{ int rename(char *from, char *to); }
 129	AUE_TRUNCATE	COMPAT	{ int truncate(char *path, long length); }
 130	AUE_FTRUNCATE	COMPAT	{ int ftruncate(int fd, long length); }
 131	AUE_FLOCK	STD	{ int flock(int fd, int how); }
 132	AUE_MKFIFO	STD	{ int mkfifo(char *path, int mode); }
 133	AUE_SENDTO	STD	{ int sendto(int s, caddr_t buf, size_t len, \
 				    int flags, caddr_t to, int tolen); }
 134	AUE_SHUTDOWN	STD	{ int shutdown(int s, int how); }
 135	AUE_SOCKETPAIR	STD	{ int socketpair(int domain, int type, \
 				    int protocol, int *rsv); }
 136	AUE_MKDIR	STD	{ int mkdir(char *path, int mode); }
 137	AUE_RMDIR	STD	{ int rmdir(char *path); }
 138	AUE_UTIMES	STD	{ int utimes(char *path, \
 				    struct timeval *tptr); }
 139	AUE_NULL	OBSOL	4.2 sigreturn
 140	AUE_ADJTIME	STD	{ int adjtime(struct timeval *delta, \
 				    struct timeval *olddelta); }
 141	AUE_GETPEERNAME	COMPAT	{ int getpeername(int fdes, caddr_t asa, \
 				    int *alen); }
 142	AUE_SYSCTL	COMPAT	{ long gethostid(void); }
 143	AUE_SYSCTL	COMPAT	{ int sethostid(long hostid); }
 144	AUE_GETRLIMIT	COMPAT	{ int getrlimit(u_int which, struct \
 				    orlimit *rlp); }
 145	AUE_SETRLIMIT	COMPAT	{ int setrlimit(u_int which, \
 				    struct orlimit *rlp); }
 146	AUE_KILLPG	COMPAT	{ int killpg(int pgid, int signum); }
 147	AUE_SETSID	STD	{ int setsid(void); }
 148	AUE_QUOTACTL	STD	{ int quotactl(char *path, int cmd, int uid, \
 				    caddr_t arg); }
 149	AUE_O_QUOTA	COMPAT	{ int quota(void); }
 150	AUE_GETSOCKNAME	COMPAT|NOARGS { int getsockname(int fdec, \
 				    caddr_t asa, int *alen); } getsockname \
 				    getsockname_args int
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	AUE_NULL	UNIMPL	sem_lock (BSD/OS 2.x)
 152	AUE_NULL	UNIMPL	sem_wakeup (BSD/OS 2.x)
 153	AUE_NULL	UNIMPL	asyncdaemon (BSD/OS 2.x)
 ; 154 is initialised by the NLM code, if present.
 154	AUE_NULL	NOSTD	{ int nlm_syscall(int debug_level, int grace_period, int addr_count, char **addrs); }
 ; 155 is initialized by the NFS code, if present.
 155	AUE_NFS_SVC	NOSTD	{ int nfssvc(int flag, caddr_t argp); }
 156	AUE_GETDIRENTRIES	COMPAT	{ int getdirentries(int fd, char *buf, \
 				    u_int count, long *basep); }
 157	AUE_STATFS	COMPAT4	{ int statfs(char *path, \
 				    struct ostatfs *buf); }
 158	AUE_FSTATFS	COMPAT4	{ int fstatfs(int fd, \
 				    struct ostatfs *buf); }
 159	AUE_NULL	UNIMPL	nosys
 160	AUE_LGETFH	STD	{ int lgetfh(char *fname, \
 				    struct fhandle *fhp); }
 161	AUE_NFS_GETFH	STD	{ int getfh(char *fname, \
 				    struct fhandle *fhp); }
 162	AUE_SYSCTL	COMPAT4	{ int getdomainname(char *domainname, \
 				    int len); }
 163	AUE_SYSCTL	COMPAT4	{ int setdomainname(char *domainname, \
 				    int len); }
 164	AUE_NULL	COMPAT4	{ int uname(struct utsname *name); }
 165	AUE_SYSARCH	STD	{ int sysarch(int op, char *parms); }
 166	AUE_RTPRIO	STD	{ int rtprio(int function, pid_t pid, \
 				    struct rtprio *rtp); }
 167	AUE_NULL	UNIMPL	nosys
 168	AUE_NULL	UNIMPL	nosys
 169	AUE_SEMSYS	NOSTD	{ int semsys(int which, int a2, int a3, \
 				    int a4, int a5); }
 ; XXX should be	{ int semsys(int which, ...); }
 170	AUE_MSGSYS	NOSTD	{ int msgsys(int which, int a2, int a3, \
 				    int a4, int a5, int a6); }
 ; XXX should be	{ int msgsys(int which, ...); }
 171	AUE_SHMSYS	NOSTD	{ int shmsys(int which, int a2, int a3, \
 				    int a4); }
 ; XXX should be	{ int shmsys(int which, ...); }
 172	AUE_NULL	UNIMPL	nosys
 173	AUE_PREAD	COMPAT6	{ ssize_t pread(int fd, void *buf, \
 				    size_t nbyte, int pad, off_t offset); }
 174	AUE_PWRITE	COMPAT6	{ ssize_t pwrite(int fd, \
 				    const void *buf, \
 				    size_t nbyte, int pad, off_t offset); }
 175	AUE_SETFIB	STD	{ int setfib(int fibnum); }
 176	AUE_NTP_ADJTIME	STD	{ int ntp_adjtime(struct timex *tp); }
 177	AUE_NULL	UNIMPL	sfork (BSD/OS 2.x)
 178	AUE_NULL	UNIMPL	getdescriptor (BSD/OS 2.x)
 179	AUE_NULL	UNIMPL	setdescriptor (BSD/OS 2.x)
 180	AUE_NULL	UNIMPL	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	AUE_SETGID	STD	{ int setgid(gid_t gid); }
 182	AUE_SETEGID	STD	{ int setegid(gid_t egid); }
 183	AUE_SETEUID	STD	{ int seteuid(uid_t euid); }
 184	AUE_NULL	UNIMPL	lfs_bmapv
 185	AUE_NULL	UNIMPL	lfs_markv
 186	AUE_NULL	UNIMPL	lfs_segclean
 187	AUE_NULL	UNIMPL	lfs_segwait
-188	AUE_STAT	STD	{ int stat(char *path, struct stat *ub); }
-189	AUE_FSTAT	STD	{ int fstat(int fd, struct stat *sb); }
-190	AUE_LSTAT	STD	{ int lstat(char *path, struct stat *ub); }
+188	AUE_STAT	COMPAT11 { int stat(char *path, \
+				    struct freebsd11_stat *ub); }
+189	AUE_FSTAT	COMPAT11 { int fstat(int fd, \
+				    struct freebsd11_stat *sb); }
+190	AUE_LSTAT	COMPAT11 { int lstat(char *path, \
+				    struct freebsd11_stat *ub); }
 191	AUE_PATHCONF	STD	{ int pathconf(char *path, int name); }
 192	AUE_FPATHCONF	STD	{ int fpathconf(int fd, int name); }
 193	AUE_NULL	UNIMPL	nosys
 194	AUE_GETRLIMIT	STD	{ int getrlimit(u_int which, \
 				    struct rlimit *rlp); } getrlimit \
 				    __getrlimit_args int
 195	AUE_SETRLIMIT	STD	{ int setrlimit(u_int which, \
 				    struct rlimit *rlp); } setrlimit \
 				    __setrlimit_args int
-196	AUE_GETDIRENTRIES	STD	{ int getdirentries(int fd, char *buf, \
+196	AUE_GETDIRENTRIES	COMPAT11 { int getdirentries(int fd, char *buf, \
 				    u_int count, long *basep); }
 197	AUE_MMAP	COMPAT6	{ caddr_t mmap(caddr_t addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, off_t pos); }
 198	AUE_NULL	NOPROTO	{ int nosys(void); } __syscall \
 				    __syscall_args int
 199	AUE_LSEEK	COMPAT6	{ off_t lseek(int fd, int pad, \
 				    off_t offset, int whence); }
 200	AUE_TRUNCATE	COMPAT6	{ int truncate(char *path, int pad, \
 				    off_t length); }
 201	AUE_FTRUNCATE	COMPAT6	{ int ftruncate(int fd, int pad, \
 				    off_t length); }
 202	AUE_SYSCTL	STD	{ int __sysctl(int *name, u_int namelen, \
 				    void *old, size_t *oldlenp, void *new, \
 				    size_t newlen); } __sysctl sysctl_args int
 203	AUE_MLOCK	STD	{ int mlock(const void *addr, size_t len); }
 204	AUE_MUNLOCK	STD	{ int munlock(const void *addr, size_t len); }
 205	AUE_UNDELETE	STD	{ int undelete(char *path); }
 206	AUE_FUTIMES	STD	{ int futimes(int fd, struct timeval *tptr); }
 207	AUE_GETPGID	STD	{ int getpgid(pid_t pid); }
 208	AUE_NULL	UNIMPL	newreboot (NetBSD)
 209	AUE_POLL	STD	{ int poll(struct pollfd *fds, u_int nfds, \
 				    int timeout); }
 
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 211	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 212	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 213	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 214	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 215	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 216	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 217	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 218	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 219	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 
 ;
 ; The following were introduced with NetBSD/4.4Lite-2
 220	AUE_SEMCTL	COMPAT7|NOSTD { int __semctl(int semid, int semnum, \
 				    int cmd, union semun_old *arg); }
 221	AUE_SEMGET	NOSTD	{ int semget(key_t key, int nsems, \
 				    int semflg); }
 222	AUE_SEMOP	NOSTD	{ int semop(int semid, struct sembuf *sops, \
 				    size_t nsops); }
 223	AUE_NULL	UNIMPL	semconfig
 224	AUE_MSGCTL	COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \
 				    struct msqid_ds_old *buf); }
 225	AUE_MSGGET	NOSTD	{ int msgget(key_t key, int msgflg); }
 226	AUE_MSGSND	NOSTD	{ int msgsnd(int msqid, const void *msgp, \
 				    size_t msgsz, int msgflg); }
 227	AUE_MSGRCV	NOSTD	{ ssize_t msgrcv(int msqid, void *msgp, \
 				    size_t msgsz, long msgtyp, int msgflg); }
 228	AUE_SHMAT	NOSTD	{ int shmat(int shmid, const void *shmaddr, \
 				    int shmflg); }
 229	AUE_SHMCTL	COMPAT7|NOSTD { int shmctl(int shmid, int cmd, \
 				    struct shmid_ds_old *buf); }
 230	AUE_SHMDT	NOSTD	{ int shmdt(const void *shmaddr); }
 231	AUE_SHMGET	NOSTD	{ int shmget(key_t key, size_t size, \
 				    int shmflg); }
 ;
 232	AUE_NULL	STD	{ int clock_gettime(clockid_t clock_id, \
 				    struct timespec *tp); }
 233	AUE_CLOCK_SETTIME	STD	{ int clock_settime( \
 				    clockid_t clock_id, \
 				    const struct timespec *tp); }
 234	AUE_NULL	STD	{ int clock_getres(clockid_t clock_id, \
 				    struct timespec *tp); }
 235	AUE_NULL	STD	{ int ktimer_create(clockid_t clock_id, \
 				    struct sigevent *evp, int *timerid); }
 236	AUE_NULL	STD	{ int ktimer_delete(int timerid); }
 237	AUE_NULL	STD	{ int ktimer_settime(int timerid, int flags, \
 				    const struct itimerspec *value, \
 				    struct itimerspec *ovalue); }
 238	AUE_NULL	STD	{ int ktimer_gettime(int timerid, struct \
 				    itimerspec *value); }
 239	AUE_NULL	STD	{ int ktimer_getoverrun(int timerid); }
 240	AUE_NULL	STD	{ int nanosleep(const struct timespec *rqtp, \
 				    struct timespec *rmtp); }
 241	AUE_NULL	STD	{ int ffclock_getcounter(ffcounter *ffcount); }
 242	AUE_NULL	STD	{ int ffclock_setestimate( \
 				    struct ffclock_estimate *cest); }
 243	AUE_NULL	STD	{ int ffclock_getestimate( \
 				    struct ffclock_estimate *cest); }
 244	AUE_NULL	STD	{ int clock_nanosleep(clockid_t clock_id, \
 				    int flags, const struct timespec *rqtp, \
 				    struct timespec *rmtp); }
 245	AUE_NULL	UNIMPL	nosys
 246	AUE_NULL	UNIMPL	nosys
 247	AUE_NULL	STD	{ int clock_getcpuclockid2(id_t id,\
 				    int which, clockid_t *clock_id); }
 248	AUE_NULL	STD	{ int ntp_gettime(struct ntptimeval *ntvp); }
 249	AUE_NULL	UNIMPL	nosys
 ; syscall numbers initially used in OpenBSD
 250	AUE_MINHERIT	STD	{ int minherit(void *addr, size_t len, \
 				    int inherit); }
 251	AUE_RFORK	STD	{ int rfork(int flags); }
 252	AUE_POLL	OBSOL	openbsd_poll
 253	AUE_ISSETUGID	STD	{ int issetugid(void); }
 254	AUE_LCHOWN	STD	{ int lchown(char *path, int uid, int gid); }
 255	AUE_AIO_READ	STD	{ int aio_read(struct aiocb *aiocbp); }
 256	AUE_AIO_WRITE	STD	{ int aio_write(struct aiocb *aiocbp); }
 257	AUE_LIO_LISTIO	STD	{ int lio_listio(int mode, \
 				    struct aiocb * const *acb_list, \
 				    int nent, struct sigevent *sig); }
 258	AUE_NULL	UNIMPL	nosys
 259	AUE_NULL	UNIMPL	nosys
 260	AUE_NULL	UNIMPL	nosys
 261	AUE_NULL	UNIMPL	nosys
 262	AUE_NULL	UNIMPL	nosys
 263	AUE_NULL	UNIMPL	nosys
 264	AUE_NULL	UNIMPL	nosys
 265	AUE_NULL	UNIMPL	nosys
 266	AUE_NULL	UNIMPL	nosys
 267	AUE_NULL	UNIMPL	nosys
 268	AUE_NULL	UNIMPL	nosys
 269	AUE_NULL	UNIMPL	nosys
 270	AUE_NULL	UNIMPL	nosys
 271	AUE_NULL	UNIMPL	nosys
-272	AUE_O_GETDENTS	STD	{ int getdents(int fd, char *buf, \
+272	AUE_O_GETDENTS	COMPAT11 { int getdents(int fd, char *buf, \
 				    size_t count); }
 273	AUE_NULL	UNIMPL	nosys
 274	AUE_LCHMOD	STD	{ int lchmod(char *path, mode_t mode); }
 275	AUE_LCHOWN	NOPROTO	{ int lchown(char *path, uid_t uid, \
 				    gid_t gid); } netbsd_lchown lchown_args \
 				    int
 276	AUE_LUTIMES	STD	{ int lutimes(char *path, \
 				    struct timeval *tptr); }
 277	AUE_MSYNC	NOPROTO	{ int msync(void *addr, size_t len, \
 				    int flags); } netbsd_msync msync_args int
-278	AUE_STAT	STD	{ int nstat(char *path, struct nstat *ub); }
-279	AUE_FSTAT	STD	{ int nfstat(int fd, struct nstat *sb); }
-280	AUE_LSTAT	STD	{ int nlstat(char *path, struct nstat *ub); }
+278	AUE_STAT	COMPAT11 { int nstat(char *path, struct nstat *ub); }
+279	AUE_FSTAT	COMPAT11 { int nfstat(int fd, struct nstat *sb); }
+280	AUE_LSTAT	COMPAT11 { int nlstat(char *path, struct nstat *ub); }
 281	AUE_NULL	UNIMPL	nosys
 282	AUE_NULL	UNIMPL	nosys
 283	AUE_NULL	UNIMPL	nosys
 284	AUE_NULL	UNIMPL	nosys
 285	AUE_NULL	UNIMPL	nosys
 286	AUE_NULL	UNIMPL	nosys
 287	AUE_NULL	UNIMPL	nosys
 288	AUE_NULL	UNIMPL	nosys
 ; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
 289	AUE_PREADV	STD	{ ssize_t preadv(int fd, struct iovec *iovp, \
 					u_int iovcnt, off_t offset); }
 290	AUE_PWRITEV	STD	{ ssize_t pwritev(int fd, struct iovec *iovp, \
 					u_int iovcnt, off_t offset); }
 291	AUE_NULL	UNIMPL	nosys
 292	AUE_NULL	UNIMPL	nosys
 293	AUE_NULL	UNIMPL	nosys
 294	AUE_NULL	UNIMPL	nosys
 295	AUE_NULL	UNIMPL	nosys
 296	AUE_NULL	UNIMPL	nosys
 ; XXX 297 is 300 in NetBSD 
 297	AUE_FHSTATFS	COMPAT4	{ int fhstatfs( \
 				    const struct fhandle *u_fhp, \
 				    struct ostatfs *buf); }
 298	AUE_FHOPEN	STD	{ int fhopen(const struct fhandle *u_fhp, \
 				    int flags); }
-299	AUE_FHSTAT	STD	{ int fhstat(const struct fhandle *u_fhp, \
-				    struct stat *sb); }
+299	AUE_FHSTAT	COMPAT11 { int fhstat(const struct fhandle *u_fhp, \
+				    struct freebsd11_stat *sb); }
 ; syscall numbers for FreeBSD
 300	AUE_NULL	STD	{ int modnext(int modid); }
 301	AUE_NULL	STD	{ int modstat(int modid, \
 				    struct module_stat *stat); }
 302	AUE_NULL	STD	{ int modfnext(int modid); }
 303	AUE_NULL	STD	{ int modfind(const char *name); }
 304	AUE_MODLOAD	STD	{ int kldload(const char *file); }
 305	AUE_MODUNLOAD	STD	{ int kldunload(int fileid); }
 306	AUE_NULL	STD	{ int kldfind(const char *file); }
 307	AUE_NULL	STD	{ int kldnext(int fileid); }
 308	AUE_NULL	STD	{ int kldstat(int fileid, struct \
 				    kld_file_stat* stat); }
 309	AUE_NULL	STD	{ int kldfirstmod(int fileid); }
 310	AUE_GETSID	STD	{ int getsid(pid_t pid); }
 311	AUE_SETRESUID	STD	{ int setresuid(uid_t ruid, uid_t euid, \
 				    uid_t suid); }
 312	AUE_SETRESGID	STD	{ int setresgid(gid_t rgid, gid_t egid, \
 				    gid_t sgid); }
 313	AUE_NULL	OBSOL	signanosleep
 314	AUE_AIO_RETURN	STD	{ ssize_t aio_return(struct aiocb *aiocbp); }
 315	AUE_AIO_SUSPEND	STD	{ int aio_suspend( \
 				    struct aiocb * const * aiocbp, int nent, \
 				    const struct timespec *timeout); }
 316	AUE_AIO_CANCEL	STD	{ int aio_cancel(int fd, \
 				    struct aiocb *aiocbp); }
 317	AUE_AIO_ERROR	STD	{ int aio_error(struct aiocb *aiocbp); }
 318	AUE_AIO_READ	COMPAT6	{ int aio_read(struct oaiocb *aiocbp); }
 319	AUE_AIO_WRITE	COMPAT6	{ int aio_write(struct oaiocb *aiocbp); }
 320	AUE_LIO_LISTIO	COMPAT6	{ int lio_listio(int mode, \
 				    struct oaiocb * const *acb_list, \
 				    int nent, struct osigevent *sig); }
 321	AUE_NULL	STD	{ int yield(void); }
 322	AUE_NULL	OBSOL	thr_sleep
 323	AUE_NULL	OBSOL	thr_wakeup
 324	AUE_MLOCKALL	STD	{ int mlockall(int how); }
 325	AUE_MUNLOCKALL	STD	{ int munlockall(void); }
 326	AUE_GETCWD	STD	{ int __getcwd(char *buf, size_t buflen); }
 
 327	AUE_NULL	STD	{ int sched_setparam (pid_t pid, \
 				    const struct sched_param *param); }
 328	AUE_NULL	STD	{ int sched_getparam (pid_t pid, struct \
 				    sched_param *param); }
 
 329	AUE_NULL	STD	{ int sched_setscheduler (pid_t pid, int \
 				    policy, const struct sched_param \
 				    *param); }
 330	AUE_NULL	STD	{ int sched_getscheduler (pid_t pid); }
 
 331	AUE_NULL	STD	{ int sched_yield (void); }
 332	AUE_NULL	STD	{ int sched_get_priority_max (int policy); }
 333	AUE_NULL	STD	{ int sched_get_priority_min (int policy); }
 334	AUE_NULL	STD	{ int sched_rr_get_interval (pid_t pid, \
 				    struct timespec *interval); }
 335	AUE_NULL	STD	{ int utrace(const void *addr, size_t len); }
 336	AUE_SENDFILE	COMPAT4	{ int sendfile(int fd, int s, \
 				    off_t offset, size_t nbytes, \
 				    struct sf_hdtr *hdtr, off_t *sbytes, \
 				    int flags); }
 337	AUE_NULL	STD	{ int kldsym(int fileid, int cmd, \
 				    void *data); }
 338	AUE_JAIL	STD	{ int jail(struct jail *jail); }
 339	AUE_NULL	NOSTD|NOTSTATIC	{ int nnpfs_syscall(int operation, \
 				    char *a_pathP, int a_opcode, \
 				    void *a_paramsP, int a_followSymlinks); }
 340	AUE_SIGPROCMASK	STD	{ int sigprocmask(int how, \
 				    const sigset_t *set, sigset_t *oset); }
 341	AUE_SIGSUSPEND	STD	{ int sigsuspend(const sigset_t *sigmask); }
 342	AUE_SIGACTION	COMPAT4	{ int sigaction(int sig, const \
 				    struct sigaction *act, \
 				    struct sigaction *oact); }
 343	AUE_SIGPENDING	STD	{ int sigpending(sigset_t *set); }
 344	AUE_SIGRETURN	COMPAT4	{ int sigreturn( \
 				    const struct ucontext4 *sigcntxp); }
 345	AUE_SIGWAIT	STD	{ int sigtimedwait(const sigset_t *set, \
 				    siginfo_t *info, \
 				    const struct timespec *timeout); }
 346	AUE_NULL	STD	{ int sigwaitinfo(const sigset_t *set, \
 				    siginfo_t *info); }
 347	AUE_ACL_GET_FILE	STD	{ int __acl_get_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 348	AUE_ACL_SET_FILE	STD	{ int __acl_set_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 349	AUE_ACL_GET_FD	STD	{ int __acl_get_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 350	AUE_ACL_SET_FD	STD	{ int __acl_set_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 351	AUE_ACL_DELETE_FILE	STD	{ int __acl_delete_file(const char *path, \
 				    acl_type_t type); }
 352	AUE_ACL_DELETE_FD	STD	{ int __acl_delete_fd(int filedes, \
 				    acl_type_t type); }
 353	AUE_ACL_CHECK_FILE	STD	{ int __acl_aclcheck_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 354	AUE_ACL_CHECK_FD	STD	{ int __acl_aclcheck_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 355	AUE_EXTATTRCTL	STD	{ int extattrctl(const char *path, int cmd, \
 				    const char *filename, int attrnamespace, \
 				    const char *attrname); }
 356	AUE_EXTATTR_SET_FILE	STD	{ ssize_t extattr_set_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 357	AUE_EXTATTR_GET_FILE	STD	{ ssize_t extattr_get_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 358	AUE_EXTATTR_DELETE_FILE	STD	{ int extattr_delete_file(const char *path, \
 				    int attrnamespace, \
 				    const char *attrname); }
 359	AUE_AIO_WAITCOMPLETE	STD	{ ssize_t aio_waitcomplete( \
 				    struct aiocb **aiocbp, \
 				    struct timespec *timeout); }
 360	AUE_GETRESUID	STD	{ int getresuid(uid_t *ruid, uid_t *euid, \
 				    uid_t *suid); }
 361	AUE_GETRESGID	STD	{ int getresgid(gid_t *rgid, gid_t *egid, \
 				    gid_t *sgid); }
 362	AUE_KQUEUE	STD	{ int kqueue(void); }
 363	AUE_KEVENT	STD	{ int kevent(int fd, \
 				    struct kevent *changelist, int nchanges, \
 				    struct kevent *eventlist, int nevents, \
 				    const struct timespec *timeout); }
 364	AUE_NULL	UNIMPL	__cap_get_proc
 365	AUE_NULL	UNIMPL	__cap_set_proc
 366	AUE_NULL	UNIMPL	__cap_get_fd
 367	AUE_NULL	UNIMPL	__cap_get_file
 368	AUE_NULL	UNIMPL	__cap_set_fd
 369	AUE_NULL	UNIMPL	__cap_set_file
 370	AUE_NULL	UNIMPL	nosys
 371	AUE_EXTATTR_SET_FD	STD	{ ssize_t extattr_set_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 372	AUE_EXTATTR_GET_FD	STD	{ ssize_t extattr_get_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 373	AUE_EXTATTR_DELETE_FD	STD	{ int extattr_delete_fd(int fd, \
 				    int attrnamespace, \
 				    const char *attrname); }
 374	AUE_SETUGID	STD	{ int __setugid(int flag); }
 375	AUE_NULL	UNIMPL	nfsclnt
 376	AUE_EACCESS	STD	{ int eaccess(char *path, int amode); }
 377	AUE_NULL	NOSTD|NOTSTATIC	{ int afs3_syscall(long syscall, \
 				    long parm1, long parm2, long parm3, \
 				    long parm4, long parm5, long parm6); }
 378	AUE_NMOUNT	STD	{ int nmount(struct iovec *iovp, \
 				    unsigned int iovcnt, int flags); }
 379	AUE_NULL	UNIMPL	kse_exit
 380	AUE_NULL	UNIMPL	kse_wakeup
 381	AUE_NULL	UNIMPL	kse_create
 382	AUE_NULL	UNIMPL	kse_thr_interrupt
 383	AUE_NULL	UNIMPL	kse_release
 384	AUE_NULL	STD	{ int __mac_get_proc(struct mac *mac_p); }
 385	AUE_NULL	STD	{ int __mac_set_proc(struct mac *mac_p); }
 386	AUE_NULL	STD	{ int __mac_get_fd(int fd, \
 				    struct mac *mac_p); }
 387	AUE_NULL	STD	{ int __mac_get_file(const char *path_p, \
 				    struct mac *mac_p); }
 388	AUE_NULL	STD	{ int __mac_set_fd(int fd, \
 				    struct mac *mac_p); }
 389	AUE_NULL	STD	{ int __mac_set_file(const char *path_p, \
 				    struct mac *mac_p); }
 390	AUE_NULL	STD	{ int kenv(int what, const char *name, \
 				    char *value, int len); }
 391	AUE_LCHFLAGS	STD	{ int lchflags(const char *path, \
 				    u_long flags); }
 392	AUE_NULL	STD	{ int uuidgen(struct uuid *store, \
 				    int count); }
 393	AUE_SENDFILE	STD	{ int sendfile(int fd, int s, off_t offset, \
 				    size_t nbytes, struct sf_hdtr *hdtr, \
 				    off_t *sbytes, int flags); }
 394	AUE_NULL	STD	{ int mac_syscall(const char *policy, \
 				    int call, void *arg); }
-395	AUE_GETFSSTAT	STD	{ int getfsstat(struct statfs *buf, \
+395	AUE_GETFSSTAT	COMPAT11 { int getfsstat(struct freebsd11_statfs *buf, \
 				    long bufsize, int mode); }
-396	AUE_STATFS	STD	{ int statfs(char *path, \
-				    struct statfs *buf); }
-397	AUE_FSTATFS	STD	{ int fstatfs(int fd, struct statfs *buf); }
-398	AUE_FHSTATFS	STD	{ int fhstatfs(const struct fhandle *u_fhp, \
-				    struct statfs *buf); }
+396	AUE_STATFS	COMPAT11 { int statfs(char *path, \
+				    struct freebsd11_statfs *buf); }
+397	AUE_FSTATFS	COMPAT11 { int fstatfs(int fd, \
+				    struct freebsd11_statfs *buf); }
+398	AUE_FHSTATFS	COMPAT11 { int fhstatfs(const struct fhandle *u_fhp, \
+				    struct freebsd11_statfs *buf); }
 399	AUE_NULL	UNIMPL	nosys
 400	AUE_SEMCLOSE	NOSTD	{ int ksem_close(semid_t id); }
 401	AUE_SEMPOST	NOSTD	{ int ksem_post(semid_t id); }
 402	AUE_SEMWAIT	NOSTD	{ int ksem_wait(semid_t id); }
 403	AUE_SEMTRYWAIT	NOSTD	{ int ksem_trywait(semid_t id); }
 404	AUE_SEMINIT	NOSTD	{ int ksem_init(semid_t *idp, \
 				    unsigned int value); }
 405	AUE_SEMOPEN	NOSTD	{ int ksem_open(semid_t *idp, \
 				    const char *name, int oflag, \
 				    mode_t mode, unsigned int value); }
 406	AUE_SEMUNLINK	NOSTD	{ int ksem_unlink(const char *name); }
 407	AUE_SEMGETVALUE	NOSTD	{ int ksem_getvalue(semid_t id, int *val); }
 408	AUE_SEMDESTROY	NOSTD	{ int ksem_destroy(semid_t id); }
 409	AUE_NULL	STD	{ int __mac_get_pid(pid_t pid, \
 				    struct mac *mac_p); }
 410	AUE_NULL	STD	{ int __mac_get_link(const char *path_p, \
 				    struct mac *mac_p); }
 411	AUE_NULL	STD	{ int __mac_set_link(const char *path_p, \
 				    struct mac *mac_p); }
 412	AUE_EXTATTR_SET_LINK	STD	{ ssize_t extattr_set_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 413	AUE_EXTATTR_GET_LINK	STD	{ ssize_t extattr_get_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 414	AUE_EXTATTR_DELETE_LINK	STD	{ int extattr_delete_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 415	AUE_NULL	STD	{ int __mac_execve(char *fname, char **argv, \
 				    char **envv, struct mac *mac_p); }
 416	AUE_SIGACTION	STD	{ int sigaction(int sig, \
 				    const struct sigaction *act, \
 				    struct sigaction *oact); }
 417	AUE_SIGRETURN	STD	{ int sigreturn( \
 				    const struct __ucontext *sigcntxp); }
 418	AUE_NULL	UNIMPL	__xstat
 419	AUE_NULL	UNIMPL	__xfstat
 420	AUE_NULL	UNIMPL	__xlstat
 421	AUE_NULL	STD	{ int getcontext(struct __ucontext *ucp); }
 422	AUE_NULL	STD	{ int setcontext( \
 				    const struct __ucontext *ucp); }
 423	AUE_NULL	STD	{ int swapcontext(struct __ucontext *oucp, \
 				    const struct __ucontext *ucp); }
 424	AUE_SWAPOFF	STD	{ int swapoff(const char *name); }
 425	AUE_ACL_GET_LINK	STD	{ int __acl_get_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 426	AUE_ACL_SET_LINK	STD	{ int __acl_set_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 427	AUE_ACL_DELETE_LINK	STD	{ int __acl_delete_link(const char *path, \
 				    acl_type_t type); }
 428	AUE_ACL_CHECK_LINK	STD	{ int __acl_aclcheck_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 429	AUE_SIGWAIT	STD	{ int sigwait(const sigset_t *set, \
 				    int *sig); }
 430	AUE_THR_CREATE	STD	{ int thr_create(ucontext_t *ctx, long *id, \
 				    int flags); }
 431	AUE_THR_EXIT	STD	{ void thr_exit(long *state); }
 432	AUE_NULL	STD	{ int thr_self(long *id); }
 433	AUE_THR_KILL	STD	{ int thr_kill(long id, int sig); }
 434	AUE_NULL	UNIMPL	nosys
 435	AUE_NULL	UNIMPL	nosys
 436	AUE_JAIL_ATTACH	STD	{ int jail_attach(int jid); }
 437	AUE_EXTATTR_LIST_FD	STD	{ ssize_t extattr_list_fd(int fd, \
 				    int attrnamespace, void *data, \
 				    size_t nbytes); }
 438	AUE_EXTATTR_LIST_FILE	STD	{ ssize_t extattr_list_file( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 439	AUE_EXTATTR_LIST_LINK	STD	{ ssize_t extattr_list_link( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 440	AUE_NULL	UNIMPL	kse_switchin
 441	AUE_SEMWAIT	NOSTD	{ int ksem_timedwait(semid_t id, \
 				    const struct timespec *abstime); }
 442	AUE_NULL	STD	{ int thr_suspend( \
 				    const struct timespec *timeout); }
 443	AUE_NULL	STD	{ int thr_wake(long id); }
 444	AUE_MODUNLOAD	STD	{ int kldunloadf(int fileid, int flags); }
 445	AUE_AUDIT	STD	{ int audit(const void *record, \
 				    u_int length); }
 446	AUE_AUDITON	STD	{ int auditon(int cmd, void *data, \
 				    u_int length); }
 447	AUE_GETAUID	STD	{ int getauid(uid_t *auid); }
 448	AUE_SETAUID	STD	{ int setauid(uid_t *auid); }
 449	AUE_GETAUDIT	STD	{ int getaudit(struct auditinfo *auditinfo); }
 450	AUE_SETAUDIT	STD	{ int setaudit(struct auditinfo *auditinfo); }
 451	AUE_GETAUDIT_ADDR	STD	{ int getaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 452	AUE_SETAUDIT_ADDR	STD	{ int setaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 453	AUE_AUDITCTL	STD	{ int auditctl(char *path); }
 454	AUE_NULL	STD	{ int _umtx_op(void *obj, int op, \
 				    u_long val, void *uaddr1, void *uaddr2); }
 455	AUE_THR_NEW	STD	{ int thr_new(struct thr_param *param, \
 				    int param_size); }
 456	AUE_NULL	STD	{ int sigqueue(pid_t pid, int signum, void *value); }
 457	AUE_MQ_OPEN	NOSTD	{ int kmq_open(const char *path, int flags, \
 				    mode_t mode, const struct mq_attr *attr); }
 458	AUE_MQ_SETATTR	NOSTD	{ int kmq_setattr(int mqd,		\
 				    const struct mq_attr *attr,		\
 				    struct mq_attr *oattr); }
 459	AUE_MQ_TIMEDRECEIVE	NOSTD	{ int kmq_timedreceive(int mqd,	\
 				    char *msg_ptr, size_t msg_len,	\
 				    unsigned *msg_prio,			\
 				    const struct timespec *abs_timeout); }
 460	AUE_MQ_TIMEDSEND	NOSTD	{ int kmq_timedsend(int mqd,		\
 				    const char *msg_ptr, size_t msg_len,\
 				    unsigned msg_prio,			\
 				    const struct timespec *abs_timeout);}
 461	AUE_MQ_NOTIFY	NOSTD	{ int kmq_notify(int mqd,		\
 				    const struct sigevent *sigev); }
 462	AUE_MQ_UNLINK	NOSTD	{ int kmq_unlink(const char *path); }
 463	AUE_NULL	STD	{ int abort2(const char *why, int nargs, void **args); }
 464	AUE_NULL	STD	{ int thr_set_name(long id, const char *name); }
 465	AUE_AIO_FSYNC	STD	{ int aio_fsync(int op, struct aiocb *aiocbp); }
 466	AUE_RTPRIO	STD	{ int rtprio_thread(int function, \
 				    lwpid_t lwpid, struct rtprio *rtp); }
 467	AUE_NULL	UNIMPL	nosys
 468	AUE_NULL	UNIMPL	nosys
 469	AUE_NULL	UNIMPL	__getpath_fromfd
 470	AUE_NULL	UNIMPL	__getpath_fromaddr
 471	AUE_SCTP_PEELOFF	NOSTD	{ int sctp_peeloff(int sd, uint32_t name); }
 472	AUE_SCTP_GENERIC_SENDMSG	NOSTD	{ int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \
 				    caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 473	AUE_SCTP_GENERIC_SENDMSG_IOV	NOSTD	{ int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \
 				    caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 474	AUE_SCTP_GENERIC_RECVMSG	NOSTD	{ int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
 475	AUE_PREAD	STD	{ ssize_t pread(int fd, void *buf, \
 				    size_t nbyte, off_t offset); }
 476	AUE_PWRITE	STD	{ ssize_t pwrite(int fd, const void *buf, \
 				    size_t nbyte, off_t offset); }
 477	AUE_MMAP	STD	{ caddr_t mmap(caddr_t addr, size_t len, \
 				    int prot, int flags, int fd, off_t pos); }
 478	AUE_LSEEK	STD	{ off_t lseek(int fd, off_t offset, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int truncate(char *path, off_t length); }
 480	AUE_FTRUNCATE	STD	{ int ftruncate(int fd, off_t length); }
 481	AUE_THR_KILL2	STD	{ int thr_kill2(pid_t pid, long id, int sig); }
 482	AUE_SHMOPEN	STD	{ int shm_open(const char *path, int flags, \
 				    mode_t mode); }
 483	AUE_SHMUNLINK	STD	{ int shm_unlink(const char *path); }
 484	AUE_NULL	STD	{ int cpuset(cpusetid_t *setid); }
 485	AUE_NULL	STD	{ int cpuset_setid(cpuwhich_t which, id_t id, \
 				    cpusetid_t setid); }
 486	AUE_NULL	STD	{ int cpuset_getid(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, \
 				    cpusetid_t *setid); }
 487	AUE_NULL	STD	{ int cpuset_getaffinity(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, size_t cpusetsize, \
 				    cpuset_t *mask); }
 488	AUE_NULL	STD	{ int cpuset_setaffinity(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, size_t cpusetsize, \
 				    const cpuset_t *mask); }
 489	AUE_FACCESSAT	STD	{ int faccessat(int fd, char *path, int amode, \
 				    int flag); }
 490	AUE_FCHMODAT	STD	{ int fchmodat(int fd, char *path, mode_t mode, \
 				    int flag); }
 491	AUE_FCHOWNAT	STD	{ int fchownat(int fd, char *path, uid_t uid, \
 				    gid_t gid, int flag); }
 492	AUE_FEXECVE	STD	{ int fexecve(int fd, char **argv, \
 				    char **envv); }
-493	AUE_FSTATAT	STD	{ int fstatat(int fd, char *path, \
-				    struct stat *buf, int flag); }
+493	AUE_FSTATAT	COMPAT11 { int fstatat(int fd, char *path, \
+				    struct freebsd11_stat *buf, int flag); }
 494	AUE_FUTIMESAT	STD	{ int futimesat(int fd, char *path, \
 				    struct timeval *times); }
 495	AUE_LINKAT	STD	{ int linkat(int fd1, char *path1, int fd2, \
 				    char *path2, int flag); }
 496	AUE_MKDIRAT	STD	{ int mkdirat(int fd, char *path, mode_t mode); }
 497	AUE_MKFIFOAT	STD	{ int mkfifoat(int fd, char *path, mode_t mode); }
-498	AUE_MKNODAT	STD	{ int mknodat(int fd, char *path, mode_t mode, \
-				    dev_t dev); }
+498	AUE_MKNODAT	COMPAT11 { int mknodat(int fd, char *path, mode_t mode, \
+				    uint32_t dev); }
 ; XXX: see the comment for open
 499	AUE_OPENAT_RWTC	STD	{ int openat(int fd, char *path, int flag, \
 				    mode_t mode); }
 500	AUE_READLINKAT	STD	{ int readlinkat(int fd, char *path, char *buf, \
 				    size_t bufsize); }
 501	AUE_RENAMEAT	STD	{ int renameat(int oldfd, char *old, int newfd, \
 				     char *new); }
 502	AUE_SYMLINKAT	STD	{ int symlinkat(char *path1, int fd, \
 				     char *path2); }
 503	AUE_UNLINKAT	STD	{ int unlinkat(int fd, char *path, int flag); }
 504	AUE_POSIX_OPENPT	STD	{ int posix_openpt(int flags); }
 ; 505 is initialised by the kgssapi code, if present.
 505	AUE_NULL	NOSTD	{ int gssd_syscall(char *path); }
 506	AUE_JAIL_GET	STD	{ int jail_get(struct iovec *iovp, \
 				    unsigned int iovcnt, int flags); }
 507	AUE_JAIL_SET	STD	{ int jail_set(struct iovec *iovp, \
 				    unsigned int iovcnt, int flags); }
 508	AUE_JAIL_REMOVE	STD	{ int jail_remove(int jid); }
 509	AUE_CLOSEFROM	STD	{ int closefrom(int lowfd); }
 510	AUE_SEMCTL	NOSTD	{ int __semctl(int semid, int semnum, \
 				    int cmd, union semun *arg); }
 511	AUE_MSGCTL	NOSTD	{ int msgctl(int msqid, int cmd, \
 				    struct msqid_ds *buf); }
 512	AUE_SHMCTL	NOSTD	{ int shmctl(int shmid, int cmd, \
 				    struct shmid_ds *buf); }
 513	AUE_LPATHCONF	STD	{ int lpathconf(char *path, int name); }
 514	AUE_NULL	OBSOL	cap_new
 515	AUE_CAP_RIGHTS_GET	STD	{ int __cap_rights_get(int version, \
 				    int fd, cap_rights_t *rightsp); }
 516	AUE_CAP_ENTER	STD	{ int cap_enter(void); }
 517	AUE_CAP_GETMODE	STD	{ int cap_getmode(u_int *modep); }
 518	AUE_PDFORK	STD	{ int pdfork(int *fdp, int flags); }
 519	AUE_PDKILL	STD	{ int pdkill(int fd, int signum); }
 520	AUE_PDGETPID	STD	{ int pdgetpid(int fd, pid_t *pidp); }
 521	AUE_PDWAIT	UNIMPL	pdwait4
 522	AUE_SELECT	STD	{ int pselect(int nd, fd_set *in, \
 				    fd_set *ou, fd_set *ex, \
 				    const struct timespec *ts, \
 				    const sigset_t *sm); }
 523	AUE_GETLOGINCLASS	STD	{ int getloginclass(char *namebuf, \
 				    size_t namelen); }
 524	AUE_SETLOGINCLASS	STD	{ int setloginclass(const char *namebuf); }
 525	AUE_NULL	STD	{ int rctl_get_racct(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 526	AUE_NULL	STD	{ int rctl_get_rules(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 527	AUE_NULL	STD	{ int rctl_get_limits(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 528	AUE_NULL	STD	{ int rctl_add_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 529	AUE_NULL	STD	{ int rctl_remove_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 530	AUE_POSIX_FALLOCATE	STD	{ int posix_fallocate(int fd, \
 				    off_t offset, off_t len); }
 531	AUE_POSIX_FADVISE	STD	{ int posix_fadvise(int fd, off_t offset, \
 				    off_t len, int advice); }
 532	AUE_WAIT6	STD	{ int wait6(idtype_t idtype, id_t id, \
 				    int *status, int options, \
 				    struct __wrusage *wrusage, \
 				    siginfo_t *info); }
 533	AUE_CAP_RIGHTS_LIMIT	STD	{ int cap_rights_limit(int fd, \
 					    cap_rights_t *rightsp); }
 534	AUE_CAP_IOCTLS_LIMIT	STD	{ int cap_ioctls_limit(int fd, \
 					    const u_long *cmds, size_t ncmds); }
 535	AUE_CAP_IOCTLS_GET	STD	{ ssize_t cap_ioctls_get(int fd, \
 					    u_long *cmds, size_t maxcmds); }
 536	AUE_CAP_FCNTLS_LIMIT	STD	{ int cap_fcntls_limit(int fd, \
 					    uint32_t fcntlrights); }
 537	AUE_CAP_FCNTLS_GET	STD	{ int cap_fcntls_get(int fd, \
 					    uint32_t *fcntlrightsp); }
 538	AUE_BINDAT	STD	{ int bindat(int fd, int s, caddr_t name, \
 				    int namelen); }
 539	AUE_CONNECTAT	STD	{ int connectat(int fd, int s, caddr_t name, \
 				    int namelen); }
 540	AUE_CHFLAGSAT	STD	{ int chflagsat(int fd, const char *path, \
 				    u_long flags, int atflag); }
 541	AUE_ACCEPT	STD	{ int accept4(int s, \
 				    struct sockaddr * __restrict name, \
 				    __socklen_t * __restrict anamelen, \
 				    int flags); }
 542	AUE_PIPE	STD	{ int pipe2(int *fildes, int flags); }
 543	AUE_AIO_MLOCK	STD	{ int aio_mlock(struct aiocb *aiocbp); }
 544	AUE_PROCCTL	STD	{ int procctl(idtype_t idtype, id_t id, \
 				    int com, void *data); }
 545	AUE_POLL	STD	{ int ppoll(struct pollfd *fds, u_int nfds, \
 				    const struct timespec *ts, \
 				    const sigset_t *set); }
 546	AUE_FUTIMES	STD	{ int futimens(int fd, \
 				    struct timespec *times); }
 547	AUE_FUTIMESAT	STD	{ int utimensat(int fd, \
 				    char *path, \
 				    struct timespec *times, int flag); }
 548	AUE_NULL	STD	{ int numa_getaffinity(cpuwhich_t which, \
 				    id_t id, \
 				    struct vm_domain_policy_entry *policy); }
 549	AUE_NULL	STD	{ int numa_setaffinity(cpuwhich_t which, \
 				    id_t id, const struct \
 				    vm_domain_policy_entry *policy); }
 550	AUE_FSYNC	STD	{ int fdatasync(int fd); }
+551	AUE_FSTAT	STD	{ int fstat(int fd, struct stat *sb); }
+552	AUE_FSTATAT	STD	{ int fstatat(int fd, char *path, \
+				    struct stat *buf, int flag); }
+553	AUE_FHSTAT	STD	{ int fhstat(const struct fhandle *u_fhp, \
+				    struct stat *sb); }
+554	AUE_GETDIRENTRIES STD	{ ssize_t getdirentries(int fd, char *buf, \
+				    size_t count, off_t *basep); }
+555	AUE_STATFS	STD	{ int statfs(char *path, struct statfs *buf); }
+556	AUE_FSTATFS	STD	{ int fstatfs(int fd, struct statfs *buf); }
+557	AUE_GETFSSTAT	STD	{ int getfsstat(struct statfs *buf, \
+				    long bufsize, int mode); }
+558	AUE_FHSTATFS	STD	{ int fhstatfs(const struct fhandle *u_fhp, \
+				    struct statfs *buf); }
+559	AUE_MKNODAT	STD	{ int mknodat(int fd, char *path, mode_t mode, \
+				    dev_t dev); }
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
Index: head/sys/kern/tty.c
===================================================================
--- head/sys/kern/tty.c	(revision 318735)
+++ head/sys/kern/tty.c	(revision 318736)
@@ -1,2347 +1,2347 @@
 /*-
  * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed under sponsorship from Snow
  * B.V., the Netherlands.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #ifdef COMPAT_43TTY
 #include <sys/ioctl_compat.h>
 #endif /* COMPAT_43TTY */
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/poll.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/serial.h>
 #include <sys/signal.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/tty.h>
 #include <sys/ttycom.h>
 #define TTYDEFCHARS
 #include <sys/ttydefaults.h>
 #undef TTYDEFCHARS
 #include <sys/ucred.h>
 #include <sys/vnode.h>
 
 #include <machine/stdarg.h>
 
 static MALLOC_DEFINE(M_TTY, "tty", "tty device");
 
 static void tty_rel_free(struct tty *tp);
 
 static TAILQ_HEAD(, tty) tty_list = TAILQ_HEAD_INITIALIZER(tty_list);
 static struct sx tty_list_sx;
 SX_SYSINIT(tty_list, &tty_list_sx, "tty list");
 static unsigned int tty_list_count = 0;
 
 /* Character device of /dev/console. */
 static struct cdev	*dev_console;
 static const char	*dev_console_filename;
 
 /*
  * Flags that are supported and stored by this implementation.
  */
 #define TTYSUP_IFLAG	(IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK|ISTRIP|\
 			INLCR|IGNCR|ICRNL|IXON|IXOFF|IXANY|IMAXBEL)
 #define TTYSUP_OFLAG	(OPOST|ONLCR|TAB3|ONOEOT|OCRNL|ONOCR|ONLRET)
 #define TTYSUP_LFLAG	(ECHOKE|ECHOE|ECHOK|ECHO|ECHONL|ECHOPRT|\
 			ECHOCTL|ISIG|ICANON|ALTWERASE|IEXTEN|TOSTOP|\
 			FLUSHO|NOKERNINFO|NOFLSH)
 #define TTYSUP_CFLAG	(CIGNORE|CSIZE|CSTOPB|CREAD|PARENB|PARODD|\
 			HUPCL|CLOCAL|CCTS_OFLOW|CRTS_IFLOW|CDTR_IFLOW|\
 			CDSR_OFLOW|CCAR_OFLOW)
 
 #define	TTY_CALLOUT(tp,d) (dev2unit(d) & TTYUNIT_CALLOUT)
 
 static int  tty_drainwait = 5 * 60;
 SYSCTL_INT(_kern, OID_AUTO, tty_drainwait, CTLFLAG_RWTUN,
     &tty_drainwait, 0, "Default output drain timeout in seconds");
 
 /*
  * Set TTY buffer sizes.
  */
 
 #define	TTYBUF_MAX	65536
 
 /*
  * Allocate buffer space if necessary, and set low watermarks, based on speed.
  * Note that the ttyxxxq_setsize() functions may drop and then reacquire the tty
  * lock during memory allocation.  They will return ENXIO if the tty disappears
  * while unlocked.
  */
 static int
 tty_watermarks(struct tty *tp)
 {
 	size_t bs = 0;
 	int error;
 
 	/* Provide an input buffer for 2 seconds of data. */
 	if (tp->t_termios.c_cflag & CREAD)
 		bs = MIN(tp->t_termios.c_ispeed / 5, TTYBUF_MAX);
 	error = ttyinq_setsize(&tp->t_inq, tp, bs);
 	if (error != 0)
 		return (error);
 
 	/* Set low watermark at 10% (when 90% is available). */
 	tp->t_inlow = (ttyinq_getallocatedsize(&tp->t_inq) * 9) / 10;
 
 	/* Provide an output buffer for 2 seconds of data. */
 	bs = MIN(tp->t_termios.c_ospeed / 5, TTYBUF_MAX);
 	error = ttyoutq_setsize(&tp->t_outq, tp, bs);
 	if (error != 0)
 		return (error);
 
 	/* Set low watermark at 10% (when 90% is available). */
 	tp->t_outlow = (ttyoutq_getallocatedsize(&tp->t_outq) * 9) / 10;
 
 	return (0);
 }
 
 static int
 tty_drain(struct tty *tp, int leaving)
 {
 	sbintime_t timeout_at;
 	size_t bytes;
 	int error;
 
 	if (ttyhook_hashook(tp, getc_inject))
 		/* buffer is inaccessible */
 		return (0);
 
 	/*
 	 * For close(), use the recent historic timeout of "1 second without
 	 * making progress".  For tcdrain(), use t_drainwait as the timeout,
 	 * with zero meaning "no timeout" which gives POSIX behavior.
 	 */
 	if (leaving)
 		timeout_at = getsbinuptime() + SBT_1S;
 	else if (tp->t_drainwait != 0)
 		timeout_at = getsbinuptime() + SBT_1S * tp->t_drainwait;
 	else
 		timeout_at = 0;
 
 	/*
 	 * Poll the output buffer and the hardware for completion, at 10 Hz.
 	 * Polling is required for devices which are not able to signal an
 	 * interrupt when the transmitter becomes idle (most USB serial devs).
 	 * The unusual structure of this loop ensures we check for busy one more
 	 * time after tty_timedwait() returns EWOULDBLOCK, so that success has
 	 * higher priority than timeout if the IO completed in the last 100mS.
 	 */
 	error = 0;
 	bytes = ttyoutq_bytesused(&tp->t_outq);
 	for (;;) {
 		if (ttyoutq_bytesused(&tp->t_outq) == 0 && !ttydevsw_busy(tp))
 			return (0);
 		if (error != 0)
 			return (error);
 		ttydevsw_outwakeup(tp);
 		error = tty_timedwait(tp, &tp->t_outwait, hz / 10);
 		if (error != 0 && error != EWOULDBLOCK)
 			return (error);
 		else if (timeout_at == 0 || getsbinuptime() < timeout_at)
 			error = 0;
 		else if (leaving && ttyoutq_bytesused(&tp->t_outq) < bytes) {
 			/* In close, making progress, grant an extra second. */
 			error = 0;
 			timeout_at += SBT_1S;
 			bytes = ttyoutq_bytesused(&tp->t_outq);
 		}
 	}
 }
 
 /*
  * Though ttydev_enter() and ttydev_leave() seem to be related, they
  * don't have to be used together. ttydev_enter() is used by the cdev
  * operations to prevent an actual operation from being processed when
  * the TTY has been abandoned. ttydev_leave() is used by ttydev_open()
  * and ttydev_close() to determine whether per-TTY data should be
  * deallocated.
  */
 
 static __inline int
 ttydev_enter(struct tty *tp)
 {
 
 	tty_lock(tp);
 
 	if (tty_gone(tp) || !tty_opened(tp)) {
 		/* Device is already gone. */
 		tty_unlock(tp);
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static void
 ttydev_leave(struct tty *tp)
 {
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	if (tty_opened(tp) || tp->t_flags & TF_OPENCLOSE) {
 		/* Device is still opened somewhere. */
 		tty_unlock(tp);
 		return;
 	}
 
 	tp->t_flags |= TF_OPENCLOSE;
 
 	/* Stop asynchronous I/O. */
 	funsetown(&tp->t_sigio);
 
 	/* Remove console TTY. */
 	if (constty == tp)
 		constty_clear();
 
 	/* Drain any output. */
 	if (!tty_gone(tp))
 		tty_drain(tp, 1);
 
 	ttydisc_close(tp);
 
 	/* Free i/o queues now since they might be large. */
 	ttyinq_free(&tp->t_inq);
 	tp->t_inlow = 0;
 	ttyoutq_free(&tp->t_outq);
 	tp->t_outlow = 0;
 
 	knlist_clear(&tp->t_inpoll.si_note, 1);
 	knlist_clear(&tp->t_outpoll.si_note, 1);
 
 	if (!tty_gone(tp))
 		ttydevsw_close(tp);
 
 	tp->t_flags &= ~TF_OPENCLOSE;
 	cv_broadcast(&tp->t_dcdwait);
 	tty_rel_free(tp);
 }
 
 /*
  * Operations that are exposed through the character device in /dev.
  */
 static int
 ttydev_open(struct cdev *dev, int oflags, int devtype __unused,
     struct thread *td)
 {
 	struct tty *tp;
 	int error;
 
 	tp = dev->si_drv1;
 	error = 0;
 	tty_lock(tp);
 	if (tty_gone(tp)) {
 		/* Device is already gone. */
 		tty_unlock(tp);
 		return (ENXIO);
 	}
 
 	/*
 	 * Block when other processes are currently opening or closing
 	 * the TTY.
 	 */
 	while (tp->t_flags & TF_OPENCLOSE) {
 		error = tty_wait(tp, &tp->t_dcdwait);
 		if (error != 0) {
 			tty_unlock(tp);
 			return (error);
 		}
 	}
 	tp->t_flags |= TF_OPENCLOSE;
 
 	/*
 	 * Make sure the "tty" and "cua" device cannot be opened at the
 	 * same time.  The console is a "tty" device.
 	 */
 	if (TTY_CALLOUT(tp, dev)) {
 		if (tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) {
 			error = EBUSY;
 			goto done;
 		}
 	} else {
 		if (tp->t_flags & TF_OPENED_OUT) {
 			error = EBUSY;
 			goto done;
 		}
 	}
 
 	if (tp->t_flags & TF_EXCLUDE && priv_check(td, PRIV_TTY_EXCLUSIVE)) {
 		error = EBUSY;
 		goto done;
 	}
 
 	if (!tty_opened(tp)) {
 		/* Set proper termios flags. */
 		if (TTY_CALLOUT(tp, dev))
 			tp->t_termios = tp->t_termios_init_out;
 		else
 			tp->t_termios = tp->t_termios_init_in;
 		ttydevsw_param(tp, &tp->t_termios);
 		/* Prevent modem control on callout devices and /dev/console. */
 		if (TTY_CALLOUT(tp, dev) || dev == dev_console)
 			tp->t_termios.c_cflag |= CLOCAL;
 
 		ttydevsw_modem(tp, SER_DTR|SER_RTS, 0);
 
 		error = ttydevsw_open(tp);
 		if (error != 0)
 			goto done;
 
 		ttydisc_open(tp);
 		error = tty_watermarks(tp);
 		if (error != 0)
 			goto done;
 	}
 
 	/* Wait for Carrier Detect. */
 	if ((oflags & O_NONBLOCK) == 0 &&
 	    (tp->t_termios.c_cflag & CLOCAL) == 0) {
 		while ((ttydevsw_modem(tp, 0, 0) & SER_DCD) == 0) {
 			error = tty_wait(tp, &tp->t_dcdwait);
 			if (error != 0)
 				goto done;
 		}
 	}
 
 	if (dev == dev_console)
 		tp->t_flags |= TF_OPENED_CONS;
 	else if (TTY_CALLOUT(tp, dev))
 		tp->t_flags |= TF_OPENED_OUT;
 	else
 		tp->t_flags |= TF_OPENED_IN;
 	MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 ||
 	    (tp->t_flags & TF_OPENED_OUT) == 0);
 
 done:	tp->t_flags &= ~TF_OPENCLOSE;
 	cv_broadcast(&tp->t_dcdwait);
 	ttydev_leave(tp);
 
 	return (error);
 }
 
 static int
 ttydev_close(struct cdev *dev, int fflag, int devtype __unused,
     struct thread *td __unused)
 {
 	struct tty *tp = dev->si_drv1;
 
 	tty_lock(tp);
 
 	/*
 	 * Don't actually close the device if it is being used as the
 	 * console.
 	 */
 	MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 ||
 	    (tp->t_flags & TF_OPENED_OUT) == 0);
 	if (dev == dev_console)
 		tp->t_flags &= ~TF_OPENED_CONS;
 	else
 		tp->t_flags &= ~(TF_OPENED_IN|TF_OPENED_OUT);
 
 	if (tp->t_flags & TF_OPENED) {
 		tty_unlock(tp);
 		return (0);
 	}
 
 	/* If revoking, flush output now to avoid draining it later. */
 	if (fflag & FREVOKE)
 		tty_flush(tp, FWRITE);
 
 	tp->t_flags &= ~TF_EXCLUDE;
 
 	/* Properly wake up threads that are stuck - revoke(). */
 	tp->t_revokecnt++;
 	tty_wakeup(tp, FREAD|FWRITE);
 	cv_broadcast(&tp->t_bgwait);
 	cv_broadcast(&tp->t_dcdwait);
 
 	ttydev_leave(tp);
 
 	return (0);
 }
 
 static __inline int
 tty_is_ctty(struct tty *tp, struct proc *p)
 {
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	return (p->p_session == tp->t_session && p->p_flag & P_CONTROLT);
 }
 
 int
 tty_wait_background(struct tty *tp, struct thread *td, int sig)
 {
 	struct proc *p = td->td_proc;
 	struct pgrp *pg;
 	ksiginfo_t ksi;
 	int error;
 
 	MPASS(sig == SIGTTIN || sig == SIGTTOU);
 	tty_lock_assert(tp, MA_OWNED);
 
 	for (;;) {
 		PROC_LOCK(p);
 		/*
 		 * The process should only sleep, when:
 		 * - This terminal is the controlling terminal
 		 * - Its process group is not the foreground process
 		 *   group
 		 * - The parent process isn't waiting for the child to
 		 *   exit
 		 * - the signal to send to the process isn't masked
 		 */
 		if (!tty_is_ctty(tp, p) || p->p_pgrp == tp->t_pgrp) {
 			/* Allow the action to happen. */
 			PROC_UNLOCK(p);
 			return (0);
 		}
 
 		if (SIGISMEMBER(p->p_sigacts->ps_sigignore, sig) ||
 		    SIGISMEMBER(td->td_sigmask, sig)) {
 			/* Only allow them in write()/ioctl(). */
 			PROC_UNLOCK(p);
 			return (sig == SIGTTOU ? 0 : EIO);
 		}
 
 		pg = p->p_pgrp;
 		if (p->p_flag & P_PPWAIT || pg->pg_jobc == 0) {
 			/* Don't allow the action to happen. */
 			PROC_UNLOCK(p);
 			return (EIO);
 		}
 		PROC_UNLOCK(p);
 
 		/*
 		 * Send the signal and sleep until we're the new
 		 * foreground process group.
 		 */
 		if (sig != 0) {
 			ksiginfo_init(&ksi);
 			ksi.ksi_code = SI_KERNEL;
 			ksi.ksi_signo = sig;
 			sig = 0;
 		}
 		PGRP_LOCK(pg);
 		pgsignal(pg, ksi.ksi_signo, 1, &ksi);
 		PGRP_UNLOCK(pg);
 
 		error = tty_wait(tp, &tp->t_bgwait);
 		if (error)
 			return (error);
 	}
 }
 
 static int
 ttydev_read(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	error = ttydev_enter(tp);
 	if (error)
 		goto done;
 	error = ttydisc_read(tp, uio, ioflag);
 	tty_unlock(tp);
 
 	/*
 	 * The read() call should not throw an error when the device is
 	 * being destroyed. Silently convert it to an EOF.
 	 */
 done:	if (error == ENXIO)
 		error = 0;
 	return (error);
 }
 
 static int
 ttydev_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	error = ttydev_enter(tp);
 	if (error)
 		return (error);
 
 	if (tp->t_termios.c_lflag & TOSTOP) {
 		error = tty_wait_background(tp, curthread, SIGTTOU);
 		if (error)
 			goto done;
 	}
 
 	if (ioflag & IO_NDELAY && tp->t_flags & TF_BUSY_OUT) {
 		/* Allow non-blocking writes to bypass serialization. */
 		error = ttydisc_write(tp, uio, ioflag);
 	} else {
 		/* Serialize write() calls. */
 		while (tp->t_flags & TF_BUSY_OUT) {
 			error = tty_wait(tp, &tp->t_outserwait);
 			if (error)
 				goto done;
 		}
 
 		tp->t_flags |= TF_BUSY_OUT;
 		error = ttydisc_write(tp, uio, ioflag);
 		tp->t_flags &= ~TF_BUSY_OUT;
 		cv_signal(&tp->t_outserwait);
 	}
 
 done:	tty_unlock(tp);
 	return (error);
 }
 
 static int
 ttydev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	error = ttydev_enter(tp);
 	if (error)
 		return (error);
 
 	switch (cmd) {
 	case TIOCCBRK:
 	case TIOCCONS:
 	case TIOCDRAIN:
 	case TIOCEXCL:
 	case TIOCFLUSH:
 	case TIOCNXCL:
 	case TIOCSBRK:
 	case TIOCSCTTY:
 	case TIOCSETA:
 	case TIOCSETAF:
 	case TIOCSETAW:
 	case TIOCSPGRP:
 	case TIOCSTART:
 	case TIOCSTAT:
 	case TIOCSTI:
 	case TIOCSTOP:
 	case TIOCSWINSZ:
 #if 0
 	case TIOCSDRAINWAIT:
 	case TIOCSETD:
 #endif
 #ifdef COMPAT_43TTY
 	case  TIOCLBIC:
 	case  TIOCLBIS:
 	case  TIOCLSET:
 	case  TIOCSETC:
 	case OTIOCSETD:
 	case  TIOCSETN:
 	case  TIOCSETP:
 	case  TIOCSLTC:
 #endif /* COMPAT_43TTY */
 		/*
 		 * If the ioctl() causes the TTY to be modified, let it
 		 * wait in the background.
 		 */
 		error = tty_wait_background(tp, curthread, SIGTTOU);
 		if (error)
 			goto done;
 	}
 
 	if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) {
 		struct termios *old = &tp->t_termios;
 		struct termios *new = (struct termios *)data;
 		struct termios *lock = TTY_CALLOUT(tp, dev) ?
 		    &tp->t_termios_lock_out : &tp->t_termios_lock_in;
 		int cc;
 
 		/*
 		 * Lock state devices.  Just overwrite the values of the
 		 * commands that are currently in use.
 		 */
 		new->c_iflag = (old->c_iflag & lock->c_iflag) |
 		    (new->c_iflag & ~lock->c_iflag);
 		new->c_oflag = (old->c_oflag & lock->c_oflag) |
 		    (new->c_oflag & ~lock->c_oflag);
 		new->c_cflag = (old->c_cflag & lock->c_cflag) |
 		    (new->c_cflag & ~lock->c_cflag);
 		new->c_lflag = (old->c_lflag & lock->c_lflag) |
 		    (new->c_lflag & ~lock->c_lflag);
 		for (cc = 0; cc < NCCS; ++cc)
 			if (lock->c_cc[cc])
 				new->c_cc[cc] = old->c_cc[cc];
 		if (lock->c_ispeed)
 			new->c_ispeed = old->c_ispeed;
 		if (lock->c_ospeed)
 			new->c_ospeed = old->c_ospeed;
 	}
 
 	error = tty_ioctl(tp, cmd, data, fflag, td);
 done:	tty_unlock(tp);
 
 	return (error);
 }
 
 static int
 ttydev_poll(struct cdev *dev, int events, struct thread *td)
 {
 	struct tty *tp = dev->si_drv1;
 	int error, revents = 0;
 
 	error = ttydev_enter(tp);
 	if (error)
 		return ((events & (POLLIN|POLLRDNORM)) | POLLHUP);
 
 	if (events & (POLLIN|POLLRDNORM)) {
 		/* See if we can read something. */
 		if (ttydisc_read_poll(tp) > 0)
 			revents |= events & (POLLIN|POLLRDNORM);
 	}
 
 	if (tp->t_flags & TF_ZOMBIE) {
 		/* Hangup flag on zombie state. */
 		revents |= POLLHUP;
 	} else if (events & (POLLOUT|POLLWRNORM)) {
 		/* See if we can write something. */
 		if (ttydisc_write_poll(tp) > 0)
 			revents |= events & (POLLOUT|POLLWRNORM);
 	}
 
 	if (revents == 0) {
 		if (events & (POLLIN|POLLRDNORM))
 			selrecord(td, &tp->t_inpoll);
 		if (events & (POLLOUT|POLLWRNORM))
 			selrecord(td, &tp->t_outpoll);
 	}
 
 	tty_unlock(tp);
 
 	return (revents);
 }
 
 static int
 ttydev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
     int nprot, vm_memattr_t *memattr)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	/* Handle mmap() through the driver. */
 
 	error = ttydev_enter(tp);
 	if (error)
 		return (-1);
 	error = ttydevsw_mmap(tp, offset, paddr, nprot, memattr);
 	tty_unlock(tp);
 
 	return (error);
 }
 
 /*
  * kqueue support.
  */
 
 static void
 tty_kqops_read_detach(struct knote *kn)
 {
 	struct tty *tp = kn->kn_hook;
 
 	knlist_remove(&tp->t_inpoll.si_note, kn, 0);
 }
 
 static int
 tty_kqops_read_event(struct knote *kn, long hint __unused)
 {
 	struct tty *tp = kn->kn_hook;
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	if (tty_gone(tp) || tp->t_flags & TF_ZOMBIE) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	} else {
 		kn->kn_data = ttydisc_read_poll(tp);
 		return (kn->kn_data > 0);
 	}
 }
 
 static void
 tty_kqops_write_detach(struct knote *kn)
 {
 	struct tty *tp = kn->kn_hook;
 
 	knlist_remove(&tp->t_outpoll.si_note, kn, 0);
 }
 
 static int
 tty_kqops_write_event(struct knote *kn, long hint __unused)
 {
 	struct tty *tp = kn->kn_hook;
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	if (tty_gone(tp)) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	} else {
 		kn->kn_data = ttydisc_write_poll(tp);
 		return (kn->kn_data > 0);
 	}
 }
 
 static struct filterops tty_kqops_read = {
 	.f_isfd = 1,
 	.f_detach = tty_kqops_read_detach,
 	.f_event = tty_kqops_read_event,
 };
 
 static struct filterops tty_kqops_write = {
 	.f_isfd = 1,
 	.f_detach = tty_kqops_write_detach,
 	.f_event = tty_kqops_write_event,
 };
 
 static int
 ttydev_kqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	error = ttydev_enter(tp);
 	if (error)
 		return (error);
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_hook = tp;
 		kn->kn_fop = &tty_kqops_read;
 		knlist_add(&tp->t_inpoll.si_note, kn, 1);
 		break;
 	case EVFILT_WRITE:
 		kn->kn_hook = tp;
 		kn->kn_fop = &tty_kqops_write;
 		knlist_add(&tp->t_outpoll.si_note, kn, 1);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	tty_unlock(tp);
 	return (error);
 }
 
 static struct cdevsw ttydev_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= ttydev_open,
 	.d_close	= ttydev_close,
 	.d_read		= ttydev_read,
 	.d_write	= ttydev_write,
 	.d_ioctl	= ttydev_ioctl,
 	.d_kqfilter	= ttydev_kqfilter,
 	.d_poll		= ttydev_poll,
 	.d_mmap		= ttydev_mmap,
 	.d_name		= "ttydev",
 	.d_flags	= D_TTY,
 };
 
 /*
  * Init/lock-state devices
  */
 
 static int
 ttyil_open(struct cdev *dev, int oflags __unused, int devtype __unused,
     struct thread *td)
 {
 	struct tty *tp;
 	int error;
 
 	tp = dev->si_drv1;
 	error = 0;
 	tty_lock(tp);
 	if (tty_gone(tp))
 		error = ENODEV;
 	tty_unlock(tp);
 
 	return (error);
 }
 
 static int
 ttyil_close(struct cdev *dev __unused, int flag __unused, int mode __unused,
     struct thread *td __unused)
 {
 
 	return (0);
 }
 
 static int
 ttyil_rdwr(struct cdev *dev __unused, struct uio *uio __unused,
     int ioflag __unused)
 {
 
 	return (ENODEV);
 }
 
 static int
 ttyil_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
 	struct tty *tp = dev->si_drv1;
 	int error;
 
 	tty_lock(tp);
 	if (tty_gone(tp)) {
 		error = ENODEV;
 		goto done;
 	}
 
 	error = ttydevsw_cioctl(tp, dev2unit(dev), cmd, data, td);
 	if (error != ENOIOCTL)
 		goto done;
 	error = 0;
 
 	switch (cmd) {
 	case TIOCGETA:
 		/* Obtain terminal flags through tcgetattr(). */
 		*(struct termios*)data = *(struct termios*)dev->si_drv2;
 		break;
 	case TIOCSETA:
 		/* Set terminal flags through tcsetattr(). */
 		error = priv_check(td, PRIV_TTY_SETA);
 		if (error)
 			break;
 		*(struct termios*)dev->si_drv2 = *(struct termios*)data;
 		break;
 	case TIOCGETD:
 		*(int *)data = TTYDISC;
 		break;
 	case TIOCGWINSZ:
 		bzero(data, sizeof(struct winsize));
 		break;
 	default:
 		error = ENOTTY;
 	}
 
 done:	tty_unlock(tp);
 	return (error);
 }
 
 static struct cdevsw ttyil_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= ttyil_open,
 	.d_close	= ttyil_close,
 	.d_read		= ttyil_rdwr,
 	.d_write	= ttyil_rdwr,
 	.d_ioctl	= ttyil_ioctl,
 	.d_name		= "ttyil",
 	.d_flags	= D_TTY,
 };
 
 static void
 tty_init_termios(struct tty *tp)
 {
 	struct termios *t = &tp->t_termios_init_in;
 
 	t->c_cflag = TTYDEF_CFLAG;
 	t->c_iflag = TTYDEF_IFLAG;
 	t->c_lflag = TTYDEF_LFLAG;
 	t->c_oflag = TTYDEF_OFLAG;
 	t->c_ispeed = TTYDEF_SPEED;
 	t->c_ospeed = TTYDEF_SPEED;
 	memcpy(&t->c_cc, ttydefchars, sizeof ttydefchars);
 
 	tp->t_termios_init_out = *t;
 }
 
 void
 tty_init_console(struct tty *tp, speed_t s)
 {
 	struct termios *ti = &tp->t_termios_init_in;
 	struct termios *to = &tp->t_termios_init_out;
 
 	if (s != 0) {
 		ti->c_ispeed = ti->c_ospeed = s;
 		to->c_ispeed = to->c_ospeed = s;
 	}
 
 	ti->c_cflag |= CLOCAL;
 	to->c_cflag |= CLOCAL;
 }
 
 /*
  * Standard device routine implementations, mostly meant for
  * pseudo-terminal device drivers. When a driver creates a new terminal
  * device class, missing routines are patched.
  */
 
 static int
 ttydevsw_defopen(struct tty *tp __unused)
 {
 
 	return (0);
 }
 
 static void
 ttydevsw_defclose(struct tty *tp __unused)
 {
 
 }
 
 static void
 ttydevsw_defoutwakeup(struct tty *tp __unused)
 {
 
 	panic("Terminal device has output, while not implemented");
 }
 
 static void
 ttydevsw_definwakeup(struct tty *tp __unused)
 {
 
 }
 
 static int
 ttydevsw_defioctl(struct tty *tp __unused, u_long cmd __unused,
     caddr_t data __unused, struct thread *td __unused)
 {
 
 	return (ENOIOCTL);
 }
 
 static int
 ttydevsw_defcioctl(struct tty *tp __unused, int unit __unused,
     u_long cmd __unused, caddr_t data __unused, struct thread *td __unused)
 {
 
 	return (ENOIOCTL);
 }
 
 static int
 ttydevsw_defparam(struct tty *tp __unused, struct termios *t)
 {
 
 	/*
 	 * Allow the baud rate to be adjusted for pseudo-devices, but at
 	 * least restrict it to 115200 to prevent excessive buffer
 	 * usage.  Also disallow 0, to prevent foot shooting.
 	 */
 	if (t->c_ispeed < B50)
 		t->c_ispeed = B50;
 	else if (t->c_ispeed > B115200)
 		t->c_ispeed = B115200;
 	if (t->c_ospeed < B50)
 		t->c_ospeed = B50;
 	else if (t->c_ospeed > B115200)
 		t->c_ospeed = B115200;
 	t->c_cflag |= CREAD;
 
 	return (0);
 }
 
 static int
 ttydevsw_defmodem(struct tty *tp __unused, int sigon __unused,
     int sigoff __unused)
 {
 
 	/* Simulate a carrier to make the TTY layer happy. */
 	return (SER_DCD);
 }
 
 static int
 ttydevsw_defmmap(struct tty *tp __unused, vm_ooffset_t offset __unused,
     vm_paddr_t *paddr __unused, int nprot __unused,
     vm_memattr_t *memattr __unused)
 {
 
 	return (-1);
 }
 
 static void
 ttydevsw_defpktnotify(struct tty *tp __unused, char event __unused)
 {
 
 }
 
 static void
 ttydevsw_deffree(void *softc __unused)
 {
 
 	panic("Terminal device freed without a free-handler");
 }
 
 static bool
 ttydevsw_defbusy(struct tty *tp __unused)
 {
 
 	return (FALSE);
 }
 
 /*
  * TTY allocation and deallocation. TTY devices can be deallocated when
  * the driver doesn't use it anymore, when the TTY isn't a session's
  * controlling TTY and when the device node isn't opened through devfs.
  */
 
 struct tty *
 tty_alloc(struct ttydevsw *tsw, void *sc)
 {
 
 	return (tty_alloc_mutex(tsw, sc, NULL));
 }
 
 struct tty *
 tty_alloc_mutex(struct ttydevsw *tsw, void *sc, struct mtx *mutex)
 {
 	struct tty *tp;
 
 	/* Make sure the driver defines all routines. */
 #define PATCH_FUNC(x) do {				\
 	if (tsw->tsw_ ## x == NULL)			\
 		tsw->tsw_ ## x = ttydevsw_def ## x;	\
 } while (0)
 	PATCH_FUNC(open);
 	PATCH_FUNC(close);
 	PATCH_FUNC(outwakeup);
 	PATCH_FUNC(inwakeup);
 	PATCH_FUNC(ioctl);
 	PATCH_FUNC(cioctl);
 	PATCH_FUNC(param);
 	PATCH_FUNC(modem);
 	PATCH_FUNC(mmap);
 	PATCH_FUNC(pktnotify);
 	PATCH_FUNC(free);
 	PATCH_FUNC(busy);
 #undef PATCH_FUNC
 
 	tp = malloc(sizeof(struct tty), M_TTY, M_WAITOK|M_ZERO);
 	tp->t_devsw = tsw;
 	tp->t_devswsoftc = sc;
 	tp->t_flags = tsw->tsw_flags;
 	tp->t_drainwait = tty_drainwait;
 
 	tty_init_termios(tp);
 
 	cv_init(&tp->t_inwait, "ttyin");
 	cv_init(&tp->t_outwait, "ttyout");
 	cv_init(&tp->t_outserwait, "ttyosr");
 	cv_init(&tp->t_bgwait, "ttybg");
 	cv_init(&tp->t_dcdwait, "ttydcd");
 
 	/* Allow drivers to use a custom mutex to lock the TTY. */
 	if (mutex != NULL) {
 		tp->t_mtx = mutex;
 	} else {
 		tp->t_mtx = &tp->t_mtxobj;
 		mtx_init(&tp->t_mtxobj, "ttymtx", NULL, MTX_DEF);
 	}
 
 	knlist_init_mtx(&tp->t_inpoll.si_note, tp->t_mtx);
 	knlist_init_mtx(&tp->t_outpoll.si_note, tp->t_mtx);
 
 	return (tp);
 }
 
 static void
 tty_dealloc(void *arg)
 {
 	struct tty *tp = arg;
 
 	/*
 	 * ttyydev_leave() usually frees the i/o queues earlier, but it is
 	 * not always called between queue allocation and here.  The queues
 	 * may be allocated by ioctls on a pty control device without the
 	 * corresponding pty slave device ever being open, or after it is
 	 * closed.
 	 */
 	ttyinq_free(&tp->t_inq);
 	ttyoutq_free(&tp->t_outq);
 	seldrain(&tp->t_inpoll);
 	seldrain(&tp->t_outpoll);
 	knlist_destroy(&tp->t_inpoll.si_note);
 	knlist_destroy(&tp->t_outpoll.si_note);
 
 	cv_destroy(&tp->t_inwait);
 	cv_destroy(&tp->t_outwait);
 	cv_destroy(&tp->t_bgwait);
 	cv_destroy(&tp->t_dcdwait);
 	cv_destroy(&tp->t_outserwait);
 
 	if (tp->t_mtx == &tp->t_mtxobj)
 		mtx_destroy(&tp->t_mtxobj);
 	ttydevsw_free(tp);
 	free(tp, M_TTY);
 }
 
 static void
 tty_rel_free(struct tty *tp)
 {
 	struct cdev *dev;
 
 	tty_lock_assert(tp, MA_OWNED);
 
 #define	TF_ACTIVITY	(TF_GONE|TF_OPENED|TF_HOOK|TF_OPENCLOSE)
 	if (tp->t_sessioncnt != 0 || (tp->t_flags & TF_ACTIVITY) != TF_GONE) {
 		/* TTY is still in use. */
 		tty_unlock(tp);
 		return;
 	}
 
 	/* TTY can be deallocated. */
 	dev = tp->t_dev;
 	tp->t_dev = NULL;
 	tty_unlock(tp);
 
 	if (dev != NULL) {
 		sx_xlock(&tty_list_sx);
 		TAILQ_REMOVE(&tty_list, tp, t_list);
 		tty_list_count--;
 		sx_xunlock(&tty_list_sx);
 		destroy_dev_sched_cb(dev, tty_dealloc, tp);
 	}
 }
 
 void
 tty_rel_pgrp(struct tty *tp, struct pgrp *pg)
 {
 
 	MPASS(tp->t_sessioncnt > 0);
 	tty_lock_assert(tp, MA_OWNED);
 
 	if (tp->t_pgrp == pg)
 		tp->t_pgrp = NULL;
 
 	tty_unlock(tp);
 }
 
 void
 tty_rel_sess(struct tty *tp, struct session *sess)
 {
 
 	MPASS(tp->t_sessioncnt > 0);
 
 	/* Current session has left. */
 	if (tp->t_session == sess) {
 		tp->t_session = NULL;
 		MPASS(tp->t_pgrp == NULL);
 	}
 	tp->t_sessioncnt--;
 	tty_rel_free(tp);
 }
 
 void
 tty_rel_gone(struct tty *tp)
 {
 
 	MPASS(!tty_gone(tp));
 
 	/* Simulate carrier removal. */
 	ttydisc_modem(tp, 0);
 
 	/* Wake up all blocked threads. */
 	tty_wakeup(tp, FREAD|FWRITE);
 	cv_broadcast(&tp->t_bgwait);
 	cv_broadcast(&tp->t_dcdwait);
 
 	tp->t_flags |= TF_GONE;
 	tty_rel_free(tp);
 }
 
 /*
  * Exposing information about current TTY's through sysctl
  */
 
 static void
 tty_to_xtty(struct tty *tp, struct xtty *xt)
 {
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	xt->xt_size = sizeof(struct xtty);
 	xt->xt_insize = ttyinq_getsize(&tp->t_inq);
 	xt->xt_incc = ttyinq_bytescanonicalized(&tp->t_inq);
 	xt->xt_inlc = ttyinq_bytesline(&tp->t_inq);
 	xt->xt_inlow = tp->t_inlow;
 	xt->xt_outsize = ttyoutq_getsize(&tp->t_outq);
 	xt->xt_outcc = ttyoutq_bytesused(&tp->t_outq);
 	xt->xt_outlow = tp->t_outlow;
 	xt->xt_column = tp->t_column;
 	xt->xt_pgid = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
 	xt->xt_sid = tp->t_session ? tp->t_session->s_sid : 0;
 	xt->xt_flags = tp->t_flags;
-	xt->xt_dev = tp->t_dev ? dev2udev(tp->t_dev) : NODEV;
+	xt->xt_dev = tp->t_dev ? dev2udev(tp->t_dev) : (uint32_t)NODEV;
 }
 
 static int
 sysctl_kern_ttys(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long lsize;
 	struct xtty *xtlist, *xt;
 	struct tty *tp;
 	int error;
 
 	sx_slock(&tty_list_sx);
 	lsize = tty_list_count * sizeof(struct xtty);
 	if (lsize == 0) {
 		sx_sunlock(&tty_list_sx);
 		return (0);
 	}
 
 	xtlist = xt = malloc(lsize, M_TTY, M_WAITOK);
 
 	TAILQ_FOREACH(tp, &tty_list, t_list) {
 		tty_lock(tp);
 		tty_to_xtty(tp, xt);
 		tty_unlock(tp);
 		xt++;
 	}
 	sx_sunlock(&tty_list_sx);
 
 	error = SYSCTL_OUT(req, xtlist, lsize);
 	free(xtlist, M_TTY);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, ttys, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
 	0, 0, sysctl_kern_ttys, "S,xtty", "List of TTYs");
 
 /*
  * Device node creation. Device has been set up, now we can expose it to
  * the user.
  */
 
 int
 tty_makedevf(struct tty *tp, struct ucred *cred, int flags,
     const char *fmt, ...)
 {
 	va_list ap;
 	struct make_dev_args args;
 	struct cdev *dev, *init, *lock, *cua, *cinit, *clock;
 	const char *prefix = "tty";
 	char name[SPECNAMELEN - 3]; /* for "tty" and "cua". */
 	uid_t uid;
 	gid_t gid;
 	mode_t mode;
 	int error;
 
 	/* Remove "tty" prefix from devices like PTY's. */
 	if (tp->t_flags & TF_NOPREFIX)
 		prefix = "";
 
 	va_start(ap, fmt);
 	vsnrprintf(name, sizeof name, 32, fmt, ap);
 	va_end(ap);
 
 	if (cred == NULL) {
 		/* System device. */
 		uid = UID_ROOT;
 		gid = GID_WHEEL;
 		mode = S_IRUSR|S_IWUSR;
 	} else {
 		/* User device. */
 		uid = cred->cr_ruid;
 		gid = GID_TTY;
 		mode = S_IRUSR|S_IWUSR|S_IWGRP;
 	}
 
 	flags = flags & TTYMK_CLONING ? MAKEDEV_REF : 0;
 	flags |= MAKEDEV_CHECKNAME;
 
 	/* Master call-in device. */
 	make_dev_args_init(&args);
 	args.mda_flags = flags;
 	args.mda_devsw = &ttydev_cdevsw;
 	args.mda_cr = cred;
 	args.mda_uid = uid;
 	args.mda_gid = gid;
 	args.mda_mode = mode;
 	args.mda_si_drv1 = tp;
 	error = make_dev_s(&args, &dev, "%s%s", prefix, name);
 	if (error != 0)
 		return (error);
 	tp->t_dev = dev;
 
 	init = lock = cua = cinit = clock = NULL;
 
 	/* Slave call-in devices. */
 	if (tp->t_flags & TF_INITLOCK) {
 		args.mda_devsw = &ttyil_cdevsw;
 		args.mda_unit = TTYUNIT_INIT;
 		args.mda_si_drv1 = tp;
 		args.mda_si_drv2 = &tp->t_termios_init_in;
 		error = make_dev_s(&args, &init, "%s%s.init", prefix, name);
 		if (error != 0)
 			goto fail;
 		dev_depends(dev, init);
 
 		args.mda_unit = TTYUNIT_LOCK;
 		args.mda_si_drv2 = &tp->t_termios_lock_in;
 		error = make_dev_s(&args, &lock, "%s%s.lock", prefix, name);
 		if (error != 0)
 			goto fail;
 		dev_depends(dev, lock);
 	}
 
 	/* Call-out devices. */
 	if (tp->t_flags & TF_CALLOUT) {
 		make_dev_args_init(&args);
 		args.mda_flags = flags;
 		args.mda_devsw = &ttydev_cdevsw;
 		args.mda_cr = cred;
 		args.mda_uid = UID_UUCP;
 		args.mda_gid = GID_DIALER;
 		args.mda_mode = 0660;
 		args.mda_unit = TTYUNIT_CALLOUT;
 		args.mda_si_drv1 = tp;
 		error = make_dev_s(&args, &cua, "cua%s", name);
 		if (error != 0)
 			goto fail;
 		dev_depends(dev, cua);
 
 		/* Slave call-out devices. */
 		if (tp->t_flags & TF_INITLOCK) {
 			args.mda_devsw = &ttyil_cdevsw;
 			args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_INIT;
 			args.mda_si_drv2 = &tp->t_termios_init_out;
 			error = make_dev_s(&args, &cinit, "cua%s.init", name);
 			if (error != 0)
 				goto fail;
 			dev_depends(dev, cinit);
 
 			args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_LOCK;
 			args.mda_si_drv2 = &tp->t_termios_lock_out;
 			error = make_dev_s(&args, &clock, "cua%s.lock", name);
 			if (error != 0)
 				goto fail;
 			dev_depends(dev, clock);
 		}
 	}
 
 	sx_xlock(&tty_list_sx);
 	TAILQ_INSERT_TAIL(&tty_list, tp, t_list);
 	tty_list_count++;
 	sx_xunlock(&tty_list_sx);
 
 	return (0);
 
 fail:
 	destroy_dev(dev);
 	if (init)
 		destroy_dev(init);
 	if (lock)
 		destroy_dev(lock);
 	if (cinit)
 		destroy_dev(cinit);
 	if (clock)
 		destroy_dev(clock);
 
 	return (error);
 }
 
 /*
  * Signalling processes.
  */
 
 void
 tty_signal_sessleader(struct tty *tp, int sig)
 {
 	struct proc *p;
 
 	tty_lock_assert(tp, MA_OWNED);
 	MPASS(sig >= 1 && sig < NSIG);
 
 	/* Make signals start output again. */
 	tp->t_flags &= ~TF_STOPPED;
 
 	if (tp->t_session != NULL && tp->t_session->s_leader != NULL) {
 		p = tp->t_session->s_leader;
 		PROC_LOCK(p);
 		kern_psignal(p, sig);
 		PROC_UNLOCK(p);
 	}
 }
 
 void
 tty_signal_pgrp(struct tty *tp, int sig)
 {
 	ksiginfo_t ksi;
 
 	tty_lock_assert(tp, MA_OWNED);
 	MPASS(sig >= 1 && sig < NSIG);
 
 	/* Make signals start output again. */
 	tp->t_flags &= ~TF_STOPPED;
 
 	if (sig == SIGINFO && !(tp->t_termios.c_lflag & NOKERNINFO))
 		tty_info(tp);
 	if (tp->t_pgrp != NULL) {
 		ksiginfo_init(&ksi);
 		ksi.ksi_signo = sig;
 		ksi.ksi_code = SI_KERNEL;
 		PGRP_LOCK(tp->t_pgrp);
 		pgsignal(tp->t_pgrp, sig, 1, &ksi);
 		PGRP_UNLOCK(tp->t_pgrp);
 	}
 }
 
 void
 tty_wakeup(struct tty *tp, int flags)
 {
 
 	if (tp->t_flags & TF_ASYNC && tp->t_sigio != NULL)
 		pgsigio(&tp->t_sigio, SIGIO, (tp->t_session != NULL));
 
 	if (flags & FWRITE) {
 		cv_broadcast(&tp->t_outwait);
 		selwakeup(&tp->t_outpoll);
 		KNOTE_LOCKED(&tp->t_outpoll.si_note, 0);
 	}
 	if (flags & FREAD) {
 		cv_broadcast(&tp->t_inwait);
 		selwakeup(&tp->t_inpoll);
 		KNOTE_LOCKED(&tp->t_inpoll.si_note, 0);
 	}
 }
 
 int
 tty_wait(struct tty *tp, struct cv *cv)
 {
 	int error;
 	int revokecnt = tp->t_revokecnt;
 
 	tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED);
 	MPASS(!tty_gone(tp));
 
 	error = cv_wait_sig(cv, tp->t_mtx);
 
 	/* Bail out when the device slipped away. */
 	if (tty_gone(tp))
 		return (ENXIO);
 
 	/* Restart the system call when we may have been revoked. */
 	if (tp->t_revokecnt != revokecnt)
 		return (ERESTART);
 
 	return (error);
 }
 
 int
 tty_timedwait(struct tty *tp, struct cv *cv, int hz)
 {
 	int error;
 	int revokecnt = tp->t_revokecnt;
 
 	tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED);
 	MPASS(!tty_gone(tp));
 
 	error = cv_timedwait_sig(cv, tp->t_mtx, hz);
 
 	/* Bail out when the device slipped away. */
 	if (tty_gone(tp))
 		return (ENXIO);
 
 	/* Restart the system call when we may have been revoked. */
 	if (tp->t_revokecnt != revokecnt)
 		return (ERESTART);
 
 	return (error);
 }
 
 void
 tty_flush(struct tty *tp, int flags)
 {
 
 	if (flags & FWRITE) {
 		tp->t_flags &= ~TF_HIWAT_OUT;
 		ttyoutq_flush(&tp->t_outq);
 		tty_wakeup(tp, FWRITE);
 		if (!tty_gone(tp)) {
 			ttydevsw_outwakeup(tp);
 			ttydevsw_pktnotify(tp, TIOCPKT_FLUSHWRITE);
 		}
 	}
 	if (flags & FREAD) {
 		tty_hiwat_in_unblock(tp);
 		ttyinq_flush(&tp->t_inq);
 		tty_wakeup(tp, FREAD);
 		if (!tty_gone(tp)) {
 			ttydevsw_inwakeup(tp);
 			ttydevsw_pktnotify(tp, TIOCPKT_FLUSHREAD);
 		}
 	}
 }
 
 void
 tty_set_winsize(struct tty *tp, const struct winsize *wsz)
 {
 
 	if (memcmp(&tp->t_winsize, wsz, sizeof(*wsz)) == 0)
 		return;
 	tp->t_winsize = *wsz;
 	tty_signal_pgrp(tp, SIGWINCH);
 }
 
 static int
 tty_generic_ioctl(struct tty *tp, u_long cmd, void *data, int fflag,
     struct thread *td)
 {
 	int error;
 
 	switch (cmd) {
 	/*
 	 * Modem commands.
 	 * The SER_* and TIOCM_* flags are the same, but one bit
 	 * shifted. I don't know why.
 	 */
 	case TIOCSDTR:
 		ttydevsw_modem(tp, SER_DTR, 0);
 		return (0);
 	case TIOCCDTR:
 		ttydevsw_modem(tp, 0, SER_DTR);
 		return (0);
 	case TIOCMSET: {
 		int bits = *(int *)data;
 		ttydevsw_modem(tp,
 		    (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1,
 		    ((~bits) & (TIOCM_DTR | TIOCM_RTS)) >> 1);
 		return (0);
 	}
 	case TIOCMBIS: {
 		int bits = *(int *)data;
 		ttydevsw_modem(tp, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1, 0);
 		return (0);
 	}
 	case TIOCMBIC: {
 		int bits = *(int *)data;
 		ttydevsw_modem(tp, 0, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1);
 		return (0);
 	}
 	case TIOCMGET:
 		*(int *)data = TIOCM_LE + (ttydevsw_modem(tp, 0, 0) << 1);
 		return (0);
 
 	case FIOASYNC:
 		if (*(int *)data)
 			tp->t_flags |= TF_ASYNC;
 		else
 			tp->t_flags &= ~TF_ASYNC;
 		return (0);
 	case FIONBIO:
 		/* This device supports non-blocking operation. */
 		return (0);
 	case FIONREAD:
 		*(int *)data = ttyinq_bytescanonicalized(&tp->t_inq);
 		return (0);
 	case FIONWRITE:
 	case TIOCOUTQ:
 		*(int *)data = ttyoutq_bytesused(&tp->t_outq);
 		return (0);
 	case FIOSETOWN:
 		if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc))
 			/* Not allowed to set ownership. */
 			return (ENOTTY);
 
 		/* Temporarily unlock the TTY to set ownership. */
 		tty_unlock(tp);
 		error = fsetown(*(int *)data, &tp->t_sigio);
 		tty_lock(tp);
 		return (error);
 	case FIOGETOWN:
 		if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc))
 			/* Not allowed to set ownership. */
 			return (ENOTTY);
 
 		/* Get ownership. */
 		*(int *)data = fgetown(&tp->t_sigio);
 		return (0);
 	case TIOCGETA:
 		/* Obtain terminal flags through tcgetattr(). */
 		*(struct termios*)data = tp->t_termios;
 		return (0);
 	case TIOCSETA:
 	case TIOCSETAW:
 	case TIOCSETAF: {
 		struct termios *t = data;
 
 		/*
 		 * Who makes up these funny rules? According to POSIX,
 		 * input baud rate is set equal to the output baud rate
 		 * when zero.
 		 */
 		if (t->c_ispeed == 0)
 			t->c_ispeed = t->c_ospeed;
 
 		/* Discard any unsupported bits. */
 		t->c_iflag &= TTYSUP_IFLAG;
 		t->c_oflag &= TTYSUP_OFLAG;
 		t->c_lflag &= TTYSUP_LFLAG;
 		t->c_cflag &= TTYSUP_CFLAG;
 
 		/* Set terminal flags through tcsetattr(). */
 		if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
 			error = tty_drain(tp, 0);
 			if (error)
 				return (error);
 			if (cmd == TIOCSETAF)
 				tty_flush(tp, FREAD);
 		}
 
 		/*
 		 * Only call param() when the flags really change.
 		 */
 		if ((t->c_cflag & CIGNORE) == 0 &&
 		    (tp->t_termios.c_cflag != t->c_cflag ||
 		    ((tp->t_termios.c_iflag ^ t->c_iflag) &
 		    (IXON|IXOFF|IXANY)) ||
 		    tp->t_termios.c_ispeed != t->c_ispeed ||
 		    tp->t_termios.c_ospeed != t->c_ospeed)) {
 			error = ttydevsw_param(tp, t);
 			if (error)
 				return (error);
 
 			/* XXX: CLOCAL? */
 
 			tp->t_termios.c_cflag = t->c_cflag & ~CIGNORE;
 			tp->t_termios.c_ispeed = t->c_ispeed;
 			tp->t_termios.c_ospeed = t->c_ospeed;
 
 			/* Baud rate has changed - update watermarks. */
 			error = tty_watermarks(tp);
 			if (error)
 				return (error);
 		}
 
 		/* Copy new non-device driver parameters. */
 		tp->t_termios.c_iflag = t->c_iflag;
 		tp->t_termios.c_oflag = t->c_oflag;
 		tp->t_termios.c_lflag = t->c_lflag;
 		memcpy(&tp->t_termios.c_cc, t->c_cc, sizeof t->c_cc);
 
 		ttydisc_optimize(tp);
 
 		if ((t->c_lflag & ICANON) == 0) {
 			/*
 			 * When in non-canonical mode, wake up all
 			 * readers. Canonicalize any partial input. VMIN
 			 * and VTIME could also be adjusted.
 			 */
 			ttyinq_canonicalize(&tp->t_inq);
 			tty_wakeup(tp, FREAD);
 		}
 
 		/*
 		 * For packet mode: notify the PTY consumer that VSTOP
 		 * and VSTART may have been changed.
 		 */
 		if (tp->t_termios.c_iflag & IXON &&
 		    tp->t_termios.c_cc[VSTOP] == CTRL('S') &&
 		    tp->t_termios.c_cc[VSTART] == CTRL('Q'))
 			ttydevsw_pktnotify(tp, TIOCPKT_DOSTOP);
 		else
 			ttydevsw_pktnotify(tp, TIOCPKT_NOSTOP);
 		return (0);
 	}
 	case TIOCGETD:
 		/* For compatibility - we only support TTYDISC. */
 		*(int *)data = TTYDISC;
 		return (0);
 	case TIOCGPGRP:
 		if (!tty_is_ctty(tp, td->td_proc))
 			return (ENOTTY);
 
 		if (tp->t_pgrp != NULL)
 			*(int *)data = tp->t_pgrp->pg_id;
 		else
 			*(int *)data = NO_PID;
 		return (0);
 	case TIOCGSID:
 		if (!tty_is_ctty(tp, td->td_proc))
 			return (ENOTTY);
 
 		MPASS(tp->t_session);
 		*(int *)data = tp->t_session->s_sid;
 		return (0);
 	case TIOCSCTTY: {
 		struct proc *p = td->td_proc;
 
 		/* XXX: This looks awful. */
 		tty_unlock(tp);
 		sx_xlock(&proctree_lock);
 		tty_lock(tp);
 
 		if (!SESS_LEADER(p)) {
 			/* Only the session leader may do this. */
 			sx_xunlock(&proctree_lock);
 			return (EPERM);
 		}
 
 		if (tp->t_session != NULL && tp->t_session == p->p_session) {
 			/* This is already our controlling TTY. */
 			sx_xunlock(&proctree_lock);
 			return (0);
 		}
 
 		if (p->p_session->s_ttyp != NULL ||
 		    (tp->t_session != NULL && tp->t_session->s_ttyvp != NULL &&
 		    tp->t_session->s_ttyvp->v_type != VBAD)) {
 			/*
 			 * There is already a relation between a TTY and
 			 * a session, or the caller is not the session
 			 * leader.
 			 *
 			 * Allow the TTY to be stolen when the vnode is
 			 * invalid, but the reference to the TTY is
 			 * still active.  This allows immediate reuse of
 			 * TTYs of which the session leader has been
 			 * killed or the TTY revoked.
 			 */
 			sx_xunlock(&proctree_lock);
 			return (EPERM);
 		}
 
 		/* Connect the session to the TTY. */
 		tp->t_session = p->p_session;
 		tp->t_session->s_ttyp = tp;
 		tp->t_sessioncnt++;
 		sx_xunlock(&proctree_lock);
 
 		/* Assign foreground process group. */
 		tp->t_pgrp = p->p_pgrp;
 		PROC_LOCK(p);
 		p->p_flag |= P_CONTROLT;
 		PROC_UNLOCK(p);
 
 		return (0);
 	}
 	case TIOCSPGRP: {
 		struct pgrp *pg;
 
 		/*
 		 * XXX: Temporarily unlock the TTY to locate the process
 		 * group. This code would be lot nicer if we would ever
 		 * decompose proctree_lock.
 		 */
 		tty_unlock(tp);
 		sx_slock(&proctree_lock);
 		pg = pgfind(*(int *)data);
 		if (pg != NULL)
 			PGRP_UNLOCK(pg);
 		if (pg == NULL || pg->pg_session != td->td_proc->p_session) {
 			sx_sunlock(&proctree_lock);
 			tty_lock(tp);
 			return (EPERM);
 		}
 		tty_lock(tp);
 
 		/*
 		 * Determine if this TTY is the controlling TTY after
 		 * relocking the TTY.
 		 */
 		if (!tty_is_ctty(tp, td->td_proc)) {
 			sx_sunlock(&proctree_lock);
 			return (ENOTTY);
 		}
 		tp->t_pgrp = pg;
 		sx_sunlock(&proctree_lock);
 
 		/* Wake up the background process groups. */
 		cv_broadcast(&tp->t_bgwait);
 		return (0);
 	}
 	case TIOCFLUSH: {
 		int flags = *(int *)data;
 
 		if (flags == 0)
 			flags = (FREAD|FWRITE);
 		else
 			flags &= (FREAD|FWRITE);
 		tty_flush(tp, flags);
 		return (0);
 	}
 	case TIOCDRAIN:
 		/* Drain TTY output. */
 		return tty_drain(tp, 0);
 	case TIOCGDRAINWAIT:
 		*(int *)data = tp->t_drainwait;
 		return (0);
 	case TIOCSDRAINWAIT:
 		error = priv_check(td, PRIV_TTY_DRAINWAIT);
 		if (error == 0)
 			tp->t_drainwait = *(int *)data;
 		return (error);
 	case TIOCCONS:
 		/* Set terminal as console TTY. */
 		if (*(int *)data) {
 			error = priv_check(td, PRIV_TTY_CONSOLE);
 			if (error)
 				return (error);
 
 			/*
 			 * XXX: constty should really need to be locked!
 			 * XXX: allow disconnected constty's to be stolen!
 			 */
 
 			if (constty == tp)
 				return (0);
 			if (constty != NULL)
 				return (EBUSY);
 
 			tty_unlock(tp);
 			constty_set(tp);
 			tty_lock(tp);
 		} else if (constty == tp) {
 			constty_clear();
 		}
 		return (0);
 	case TIOCGWINSZ:
 		/* Obtain window size. */
 		*(struct winsize*)data = tp->t_winsize;
 		return (0);
 	case TIOCSWINSZ:
 		/* Set window size. */
 		tty_set_winsize(tp, data);
 		return (0);
 	case TIOCEXCL:
 		tp->t_flags |= TF_EXCLUDE;
 		return (0);
 	case TIOCNXCL:
 		tp->t_flags &= ~TF_EXCLUDE;
 		return (0);
 	case TIOCSTOP:
 		tp->t_flags |= TF_STOPPED;
 		ttydevsw_pktnotify(tp, TIOCPKT_STOP);
 		return (0);
 	case TIOCSTART:
 		tp->t_flags &= ~TF_STOPPED;
 		ttydevsw_outwakeup(tp);
 		ttydevsw_pktnotify(tp, TIOCPKT_START);
 		return (0);
 	case TIOCSTAT:
 		tty_info(tp);
 		return (0);
 	case TIOCSTI:
 		if ((fflag & FREAD) == 0 && priv_check(td, PRIV_TTY_STI))
 			return (EPERM);
 		if (!tty_is_ctty(tp, td->td_proc) &&
 		    priv_check(td, PRIV_TTY_STI))
 			return (EACCES);
 		ttydisc_rint(tp, *(char *)data, 0);
 		ttydisc_rint_done(tp);
 		return (0);
 	}
 
 #ifdef COMPAT_43TTY
 	return tty_ioctl_compat(tp, cmd, data, fflag, td);
 #else /* !COMPAT_43TTY */
 	return (ENOIOCTL);
 #endif /* COMPAT_43TTY */
 }
 
 int
 tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td)
 {
 	int error;
 
 	tty_lock_assert(tp, MA_OWNED);
 
 	if (tty_gone(tp))
 		return (ENXIO);
 
 	error = ttydevsw_ioctl(tp, cmd, data, td);
 	if (error == ENOIOCTL)
 		error = tty_generic_ioctl(tp, cmd, data, fflag, td);
 
 	return (error);
 }
 
 dev_t
 tty_udev(struct tty *tp)
 {
 
 	if (tp->t_dev)
 		return (dev2udev(tp->t_dev));
 	else
 		return (NODEV);
 }
 
 int
 tty_checkoutq(struct tty *tp)
 {
 
 	/* 256 bytes should be enough to print a log message. */
 	return (ttyoutq_bytesleft(&tp->t_outq) >= 256);
 }
 
 void
 tty_hiwat_in_block(struct tty *tp)
 {
 
 	if ((tp->t_flags & TF_HIWAT_IN) == 0 &&
 	    tp->t_termios.c_iflag & IXOFF &&
 	    tp->t_termios.c_cc[VSTOP] != _POSIX_VDISABLE) {
 		/*
 		 * Input flow control. Only enter the high watermark when we
 		 * can successfully store the VSTOP character.
 		 */
 		if (ttyoutq_write_nofrag(&tp->t_outq,
 		    &tp->t_termios.c_cc[VSTOP], 1) == 0)
 			tp->t_flags |= TF_HIWAT_IN;
 	} else {
 		/* No input flow control. */
 		tp->t_flags |= TF_HIWAT_IN;
 	}
 }
 
 void
 tty_hiwat_in_unblock(struct tty *tp)
 {
 
 	if (tp->t_flags & TF_HIWAT_IN &&
 	    tp->t_termios.c_iflag & IXOFF &&
 	    tp->t_termios.c_cc[VSTART] != _POSIX_VDISABLE) {
 		/*
 		 * Input flow control. Only leave the high watermark when we
 		 * can successfully store the VSTART character.
 		 */
 		if (ttyoutq_write_nofrag(&tp->t_outq,
 		    &tp->t_termios.c_cc[VSTART], 1) == 0)
 			tp->t_flags &= ~TF_HIWAT_IN;
 	} else {
 		/* No input flow control. */
 		tp->t_flags &= ~TF_HIWAT_IN;
 	}
 
 	if (!tty_gone(tp))
 		ttydevsw_inwakeup(tp);
 }
 
 /*
  * TTY hooks interface.
  */
 
 static int
 ttyhook_defrint(struct tty *tp, char c, int flags)
 {
 
 	if (ttyhook_rint_bypass(tp, &c, 1) != 1)
 		return (-1);
 
 	return (0);
 }
 
 int
 ttyhook_register(struct tty **rtp, struct proc *p, int fd, struct ttyhook *th,
     void *softc)
 {
 	struct tty *tp;
 	struct file *fp;
 	struct cdev *dev;
 	struct cdevsw *cdp;
 	struct filedesc *fdp;
 	cap_rights_t rights;
 	int error, ref;
 
 	/* Validate the file descriptor. */
 	fdp = p->p_fd;
 	error = fget_unlocked(fdp, fd, cap_rights_init(&rights, CAP_TTYHOOK),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
 		error = EBADF;
 		goto done1;
 	}
 
 	/*
 	 * Make sure the vnode is bound to a character device.
 	 * Unlocked check for the vnode type is ok there, because we
 	 * only shall prevent calling devvn_refthread on the file that
 	 * never has been opened over a character device.
 	 */
 	if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VCHR) {
 		error = EINVAL;
 		goto done1;
 	}
 
 	/* Make sure it is a TTY. */
 	cdp = devvn_refthread(fp->f_vnode, &dev, &ref);
 	if (cdp == NULL) {
 		error = ENXIO;
 		goto done1;
 	}
 	if (dev != fp->f_data) {
 		error = ENXIO;
 		goto done2;
 	}
 	if (cdp != &ttydev_cdevsw) {
 		error = ENOTTY;
 		goto done2;
 	}
 	tp = dev->si_drv1;
 
 	/* Try to attach the hook to the TTY. */
 	error = EBUSY;
 	tty_lock(tp);
 	MPASS((tp->t_hook == NULL) == ((tp->t_flags & TF_HOOK) == 0));
 	if (tp->t_flags & TF_HOOK)
 		goto done3;
 
 	tp->t_flags |= TF_HOOK;
 	tp->t_hook = th;
 	tp->t_hooksoftc = softc;
 	*rtp = tp;
 	error = 0;
 
 	/* Maybe we can switch into bypass mode now. */
 	ttydisc_optimize(tp);
 
 	/* Silently convert rint() calls to rint_bypass() when possible. */
 	if (!ttyhook_hashook(tp, rint) && ttyhook_hashook(tp, rint_bypass))
 		th->th_rint = ttyhook_defrint;
 
 done3:	tty_unlock(tp);
 done2:	dev_relthread(dev, ref);
 done1:	fdrop(fp, curthread);
 	return (error);
 }
 
 void
 ttyhook_unregister(struct tty *tp)
 {
 
 	tty_lock_assert(tp, MA_OWNED);
 	MPASS(tp->t_flags & TF_HOOK);
 
 	/* Disconnect the hook. */
 	tp->t_flags &= ~TF_HOOK;
 	tp->t_hook = NULL;
 
 	/* Maybe we need to leave bypass mode. */
 	ttydisc_optimize(tp);
 
 	/* Maybe deallocate the TTY as well. */
 	tty_rel_free(tp);
 }
 
 /*
  * /dev/console handling.
  */
 
 static int
 ttyconsdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 {
 	struct tty *tp;
 
 	/* System has no console device. */
 	if (dev_console_filename == NULL)
 		return (ENXIO);
 
 	/* Look up corresponding TTY by device name. */
 	sx_slock(&tty_list_sx);
 	TAILQ_FOREACH(tp, &tty_list, t_list) {
 		if (strcmp(dev_console_filename, tty_devname(tp)) == 0) {
 			dev_console->si_drv1 = tp;
 			break;
 		}
 	}
 	sx_sunlock(&tty_list_sx);
 
 	/* System console has no TTY associated. */
 	if (dev_console->si_drv1 == NULL)
 		return (ENXIO);
 
 	return (ttydev_open(dev, oflags, devtype, td));
 }
 
 static int
 ttyconsdev_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 
 	log_console(uio);
 
 	return (ttydev_write(dev, uio, ioflag));
 }
 
 /*
  * /dev/console is a little different than normal TTY's.  When opened,
  * it determines which TTY to use.  When data gets written to it, it
  * will be logged in the kernel message buffer.
  */
 static struct cdevsw ttyconsdev_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= ttyconsdev_open,
 	.d_close	= ttydev_close,
 	.d_read		= ttydev_read,
 	.d_write	= ttyconsdev_write,
 	.d_ioctl	= ttydev_ioctl,
 	.d_kqfilter	= ttydev_kqfilter,
 	.d_poll		= ttydev_poll,
 	.d_mmap		= ttydev_mmap,
 	.d_name		= "ttyconsdev",
 	.d_flags	= D_TTY,
 };
 
 static void
 ttyconsdev_init(void *unused __unused)
 {
 
 	dev_console = make_dev_credf(MAKEDEV_ETERNAL, &ttyconsdev_cdevsw, 0,
 	    NULL, UID_ROOT, GID_WHEEL, 0600, "console");
 }
 
 SYSINIT(tty, SI_SUB_DRIVERS, SI_ORDER_FIRST, ttyconsdev_init, NULL);
 
 void
 ttyconsdev_select(const char *name)
 {
 
 	dev_console_filename = name;
 }
 
 /*
  * Debugging routines.
  */
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 
 static const struct {
 	int flag;
 	char val;
 } ttystates[] = {
 #if 0
 	{ TF_NOPREFIX,		'N' },
 #endif
 	{ TF_INITLOCK,		'I' },
 	{ TF_CALLOUT,		'C' },
 
 	/* Keep these together -> 'Oi' and 'Oo'. */
 	{ TF_OPENED,		'O' },
 	{ TF_OPENED_IN,		'i' },
 	{ TF_OPENED_OUT,	'o' },
 	{ TF_OPENED_CONS,	'c' },
 
 	{ TF_GONE,		'G' },
 	{ TF_OPENCLOSE,		'B' },
 	{ TF_ASYNC,		'Y' },
 	{ TF_LITERAL,		'L' },
 
 	/* Keep these together -> 'Hi' and 'Ho'. */
 	{ TF_HIWAT,		'H' },
 	{ TF_HIWAT_IN,		'i' },
 	{ TF_HIWAT_OUT,		'o' },
 
 	{ TF_STOPPED,		'S' },
 	{ TF_EXCLUDE,		'X' },
 	{ TF_BYPASS,		'l' },
 	{ TF_ZOMBIE,		'Z' },
 	{ TF_HOOK,		's' },
 
 	/* Keep these together -> 'bi' and 'bo'. */
 	{ TF_BUSY,		'b' },
 	{ TF_BUSY_IN,		'i' },
 	{ TF_BUSY_OUT,		'o' },
 
 	{ 0,			'\0'},
 };
 
 #define	TTY_FLAG_BITS \
 	"\20\1NOPREFIX\2INITLOCK\3CALLOUT\4OPENED_IN" \
 	"\5OPENED_OUT\6OPENED_CONS\7GONE\10OPENCLOSE" \
 	"\11ASYNC\12LITERAL\13HIWAT_IN\14HIWAT_OUT" \
 	"\15STOPPED\16EXCLUDE\17BYPASS\20ZOMBIE" \
 	"\21HOOK\22BUSY_IN\23BUSY_OUT"
 
 #define DB_PRINTSYM(name, addr) \
 	db_printf("%s  " #name ": ", sep); \
 	db_printsym((db_addr_t) addr, DB_STGY_ANY); \
 	db_printf("\n");
 
 static void
 _db_show_devsw(const char *sep, const struct ttydevsw *tsw)
 {
 
 	db_printf("%sdevsw: ", sep);
 	db_printsym((db_addr_t)tsw, DB_STGY_ANY);
 	db_printf(" (%p)\n", tsw);
 	DB_PRINTSYM(open, tsw->tsw_open);
 	DB_PRINTSYM(close, tsw->tsw_close);
 	DB_PRINTSYM(outwakeup, tsw->tsw_outwakeup);
 	DB_PRINTSYM(inwakeup, tsw->tsw_inwakeup);
 	DB_PRINTSYM(ioctl, tsw->tsw_ioctl);
 	DB_PRINTSYM(param, tsw->tsw_param);
 	DB_PRINTSYM(modem, tsw->tsw_modem);
 	DB_PRINTSYM(mmap, tsw->tsw_mmap);
 	DB_PRINTSYM(pktnotify, tsw->tsw_pktnotify);
 	DB_PRINTSYM(free, tsw->tsw_free);
 }
 
 static void
 _db_show_hooks(const char *sep, const struct ttyhook *th)
 {
 
 	db_printf("%shook: ", sep);
 	db_printsym((db_addr_t)th, DB_STGY_ANY);
 	db_printf(" (%p)\n", th);
 	if (th == NULL)
 		return;
 	DB_PRINTSYM(rint, th->th_rint);
 	DB_PRINTSYM(rint_bypass, th->th_rint_bypass);
 	DB_PRINTSYM(rint_done, th->th_rint_done);
 	DB_PRINTSYM(rint_poll, th->th_rint_poll);
 	DB_PRINTSYM(getc_inject, th->th_getc_inject);
 	DB_PRINTSYM(getc_capture, th->th_getc_capture);
 	DB_PRINTSYM(getc_poll, th->th_getc_poll);
 	DB_PRINTSYM(close, th->th_close);
 }
 
 static void
 _db_show_termios(const char *name, const struct termios *t)
 {
 
 	db_printf("%s: iflag 0x%x oflag 0x%x cflag 0x%x "
 	    "lflag 0x%x ispeed %u ospeed %u\n", name,
 	    t->c_iflag, t->c_oflag, t->c_cflag, t->c_lflag,
 	    t->c_ispeed, t->c_ospeed);
 }
 
 /* DDB command to show TTY statistics. */
 DB_SHOW_COMMAND(tty, db_show_tty)
 {
 	struct tty *tp;
 
 	if (!have_addr) {
 		db_printf("usage: show tty <addr>\n");
 		return;
 	}
 	tp = (struct tty *)addr;
 
 	db_printf("%p: %s\n", tp, tty_devname(tp));
 	db_printf("\tmtx: %p\n", tp->t_mtx);
 	db_printf("\tflags: 0x%b\n", tp->t_flags, TTY_FLAG_BITS);
 	db_printf("\trevokecnt: %u\n", tp->t_revokecnt);
 
 	/* Buffering mechanisms. */
 	db_printf("\tinq: %p begin %u linestart %u reprint %u end %u "
 	    "nblocks %u quota %u\n", &tp->t_inq, tp->t_inq.ti_begin,
 	    tp->t_inq.ti_linestart, tp->t_inq.ti_reprint, tp->t_inq.ti_end,
 	    tp->t_inq.ti_nblocks, tp->t_inq.ti_quota);
 	db_printf("\toutq: %p begin %u end %u nblocks %u quota %u\n",
 	    &tp->t_outq, tp->t_outq.to_begin, tp->t_outq.to_end,
 	    tp->t_outq.to_nblocks, tp->t_outq.to_quota);
 	db_printf("\tinlow: %zu\n", tp->t_inlow);
 	db_printf("\toutlow: %zu\n", tp->t_outlow);
 	_db_show_termios("\ttermios", &tp->t_termios);
 	db_printf("\twinsize: row %u col %u xpixel %u ypixel %u\n",
 	    tp->t_winsize.ws_row, tp->t_winsize.ws_col,
 	    tp->t_winsize.ws_xpixel, tp->t_winsize.ws_ypixel);
 	db_printf("\tcolumn: %u\n", tp->t_column);
 	db_printf("\twritepos: %u\n", tp->t_writepos);
 	db_printf("\tcompatflags: 0x%x\n", tp->t_compatflags);
 
 	/* Init/lock-state devices. */
 	_db_show_termios("\ttermios_init_in", &tp->t_termios_init_in);
 	_db_show_termios("\ttermios_init_out", &tp->t_termios_init_out);
 	_db_show_termios("\ttermios_lock_in", &tp->t_termios_lock_in);
 	_db_show_termios("\ttermios_lock_out", &tp->t_termios_lock_out);
 
 	/* Hooks */
 	_db_show_devsw("\t", tp->t_devsw);
 	_db_show_hooks("\t", tp->t_hook);
 
 	/* Process info. */
 	db_printf("\tpgrp: %p gid %d jobc %d\n", tp->t_pgrp,
 	    tp->t_pgrp ? tp->t_pgrp->pg_id : 0,
 	    tp->t_pgrp ? tp->t_pgrp->pg_jobc : 0);
 	db_printf("\tsession: %p", tp->t_session);
 	if (tp->t_session != NULL)
 	    db_printf(" count %u leader %p tty %p sid %d login %s",
 		tp->t_session->s_count, tp->t_session->s_leader,
 		tp->t_session->s_ttyp, tp->t_session->s_sid,
 		tp->t_session->s_login);
 	db_printf("\n");
 	db_printf("\tsessioncnt: %u\n", tp->t_sessioncnt);
 	db_printf("\tdevswsoftc: %p\n", tp->t_devswsoftc);
 	db_printf("\thooksoftc: %p\n", tp->t_hooksoftc);
 	db_printf("\tdev: %p\n", tp->t_dev);
 }
 
 /* DDB command to list TTYs. */
 DB_SHOW_ALL_COMMAND(ttys, db_show_all_ttys)
 {
 	struct tty *tp;
 	size_t isiz, osiz;
 	int i, j;
 
 	/* Make the output look like `pstat -t'. */
 	db_printf("PTR        ");
 #if defined(__LP64__)
 	db_printf("        ");
 #endif
 	db_printf("      LINE   INQ  CAN  LIN  LOW  OUTQ  USE  LOW   "
 	    "COL  SESS  PGID STATE\n");
 
 	TAILQ_FOREACH(tp, &tty_list, t_list) {
 		isiz = tp->t_inq.ti_nblocks * TTYINQ_DATASIZE;
 		osiz = tp->t_outq.to_nblocks * TTYOUTQ_DATASIZE;
 
 		db_printf("%p %10s %5zu %4u %4u %4zu %5zu %4u %4zu %5u %5d "
 		    "%5d ", tp, tty_devname(tp), isiz,
 		    tp->t_inq.ti_linestart - tp->t_inq.ti_begin,
 		    tp->t_inq.ti_end - tp->t_inq.ti_linestart,
 		    isiz - tp->t_inlow, osiz,
 		    tp->t_outq.to_end - tp->t_outq.to_begin,
 		    osiz - tp->t_outlow, MIN(tp->t_column, 99999),
 		    tp->t_session ? tp->t_session->s_sid : 0,
 		    tp->t_pgrp ? tp->t_pgrp->pg_id : 0);
 
 		/* Flag bits. */
 		for (i = j = 0; ttystates[i].flag; i++)
 			if (tp->t_flags & ttystates[i].flag) {
 				db_printf("%c", ttystates[i].val);
 				j++;
 			}
 		if (j == 0)
 			db_printf("-");
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/tty_pts.c
===================================================================
--- head/sys/kern/tty_pts.c	(revision 318735)
+++ head/sys/kern/tty_pts.c	(revision 318736)
@@ -1,867 +1,869 @@
 /*-
  * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed under sponsorship from Snow
  * B.V., the Netherlands.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* Add compatibility bits for FreeBSD. */
 #define PTS_COMPAT
 /* Add pty(4) compat bits. */
 #define PTS_EXTERNAL
 /* Add bits to make Linux binaries work. */
 #define PTS_LINUX
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/serial.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/tty.h>
 #include <sys/ttycom.h>
 #include <sys/user.h>
 
 #include <machine/stdarg.h>
 
 /*
  * Our utmp(5) format is limited to 8-byte TTY line names.  This means
  * we can at most allocate 1000 pseudo-terminals ("pts/999").  Allow
  * users to increase this number, assuming they have manually increased
  * UT_LINESIZE.
  */
 static struct unrhdr *pts_pool;
 
 static MALLOC_DEFINE(M_PTS, "pts", "pseudo tty device");
 
 /*
  * Per-PTS structure.
  *
  * List of locks
  * (t)	locked by tty_lock()
  * (c)	const until freeing
  */
 struct pts_softc {
 	int		pts_unit;	/* (c) Device unit number. */
 	unsigned int	pts_flags;	/* (t) Device flags. */
 #define	PTS_PKT		0x1	/* Packet mode. */
 #define	PTS_FINISHED	0x2	/* Return errors on read()/write(). */
 	char		pts_pkt;	/* (t) Unread packet mode data. */
 
 	struct cv	pts_inwait;	/* (t) Blocking write() on master. */
 	struct selinfo	pts_inpoll;	/* (t) Select queue for write(). */
 	struct cv	pts_outwait;	/* (t) Blocking read() on master. */
 	struct selinfo	pts_outpoll;	/* (t) Select queue for read(). */
 
 #ifdef PTS_EXTERNAL
 	struct cdev	*pts_cdev;	/* (c) Master device node. */
 #endif /* PTS_EXTERNAL */
 
 	struct ucred	*pts_cred;	/* (c) Resource limit. */
 };
 
 /*
  * Controller-side file operations.
  */
 
 static int
 ptsdev_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 	int error = 0;
 	char pkt;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	tty_lock(tp);
 
 	for (;;) {
 		/*
 		 * Implement packet mode. When packet mode is turned on,
 		 * the first byte contains a bitmask of events that
 		 * occurred (start, stop, flush, window size, etc).
 		 */
 		if (psc->pts_flags & PTS_PKT && psc->pts_pkt) {
 			pkt = psc->pts_pkt;
 			psc->pts_pkt = 0;
 			tty_unlock(tp);
 
 			error = ureadc(pkt, uio);
 			return (error);
 		}
 
 		/*
 		 * Transmit regular data.
 		 *
 		 * XXX: We shouldn't use ttydisc_getc_poll()! Even
 		 * though in this implementation, there is likely going
 		 * to be data, we should just call ttydisc_getc_uio()
 		 * and use its return value to sleep.
 		 */
 		if (ttydisc_getc_poll(tp)) {
 			if (psc->pts_flags & PTS_PKT) {
 				/*
 				 * XXX: Small race. Fortunately PTY
 				 * consumers aren't multithreaded.
 				 */
 
 				tty_unlock(tp);
 				error = ureadc(TIOCPKT_DATA, uio);
 				if (error)
 					return (error);
 				tty_lock(tp);
 			}
 
 			error = ttydisc_getc_uio(tp, uio);
 			break;
 		}
 
 		/* Maybe the device isn't used anyway. */
 		if (psc->pts_flags & PTS_FINISHED)
 			break;
 
 		/* Wait for more data. */
 		if (fp->f_flag & O_NONBLOCK) {
 			error = EWOULDBLOCK;
 			break;
 		}
 		error = cv_wait_sig(&psc->pts_outwait, tp->t_mtx);
 		if (error != 0)
 			break;
 	}
 
 	tty_unlock(tp);
 
 	return (error);
 }
 
 static int
 ptsdev_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 	char ib[256], *ibstart;
 	size_t iblen, rintlen;
 	int error = 0;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	for (;;) {
 		ibstart = ib;
 		iblen = MIN(uio->uio_resid, sizeof ib);
 		error = uiomove(ib, iblen, uio);
 
 		tty_lock(tp);
 		if (error != 0) {
 			iblen = 0;
 			goto done;
 		}
 
 		/*
 		 * When possible, avoid the slow path. rint_bypass()
 		 * copies all input to the input queue at once.
 		 */
 		MPASS(iblen > 0);
 		do {
 			rintlen = ttydisc_rint_simple(tp, ibstart, iblen);
 			ibstart += rintlen;
 			iblen -= rintlen;
 			if (iblen == 0) {
 				/* All data written. */
 				break;
 			}
 
 			/* Maybe the device isn't used anyway. */
 			if (psc->pts_flags & PTS_FINISHED) {
 				error = EIO;
 				goto done;
 			}
 
 			/* Wait for more data. */
 			if (fp->f_flag & O_NONBLOCK) {
 				error = EWOULDBLOCK;
 				goto done;
 			}
 
 			/* Wake up users on the slave side. */
 			ttydisc_rint_done(tp);
 			error = cv_wait_sig(&psc->pts_inwait, tp->t_mtx);
 			if (error != 0)
 				goto done;
 		} while (iblen > 0);
 
 		if (uio->uio_resid == 0)
 			break;
 		tty_unlock(tp);
 	}
 
 done:	ttydisc_rint_done(tp);
 	tty_unlock(tp);
 
 	/*
 	 * Don't account for the part of the buffer that we couldn't
 	 * pass to the TTY.
 	 */
 	uio->uio_resid += iblen;
 	return (error);
 }
 
 static int
 ptsdev_ioctl(struct file *fp, u_long cmd, void *data,
     struct ucred *active_cred, struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 	int error = 0, sig;
 
 	switch (cmd) {
 	case FIODTYPE:
 		*(int *)data = D_TTY;
 		return (0);
 	case FIONBIO:
 		/* This device supports non-blocking operation. */
 		return (0);
 	case FIONREAD:
 		tty_lock(tp);
 		if (psc->pts_flags & PTS_FINISHED) {
 			/* Force read() to be called. */
 			*(int *)data = 1;
 		} else {
 			*(int *)data = ttydisc_getc_poll(tp);
 		}
 		tty_unlock(tp);
 		return (0);
 	case FIODGNAME: {
 		struct fiodgname_arg *fgn;
 		const char *p;
 		int i;
 
 		/* Reverse device name lookups, for ptsname() and ttyname(). */
 		fgn = data;
 		p = tty_devname(tp);
 		i = strlen(p) + 1;
 		if (i > fgn->len)
 			return (EINVAL);
 		return copyout(p, fgn->buf, i);
 	}
 
 	/*
 	 * We need to implement TIOCGPGRP and TIOCGSID here again. When
 	 * called on the pseudo-terminal master, it should not check if
 	 * the terminal is the foreground terminal of the calling
 	 * process.
 	 *
 	 * TIOCGETA is also implemented here. Various Linux PTY routines
 	 * often call isatty(), which is implemented by tcgetattr().
 	 */
 #ifdef PTS_LINUX
 	case TIOCGETA:
 		/* Obtain terminal flags through tcgetattr(). */
 		tty_lock(tp);
 		*(struct termios*)data = tp->t_termios;
 		tty_unlock(tp);
 		return (0);
 #endif /* PTS_LINUX */
 	case TIOCSETAF:
 	case TIOCSETAW:
 		/*
 		 * We must make sure we turn tcsetattr() calls of TCSAFLUSH and
 		 * TCSADRAIN into something different. If an application would
 		 * call TCSAFLUSH or TCSADRAIN on the master descriptor, it may
 		 * deadlock waiting for all data to be read.
 		 */
 		cmd = TIOCSETA;
 		break;
 #if defined(PTS_COMPAT) || defined(PTS_LINUX)
 	case TIOCGPTN:
 		/*
 		 * Get the device unit number.
 		 */
 		if (psc->pts_unit < 0)
 			return (ENOTTY);
 		*(unsigned int *)data = psc->pts_unit;
 		return (0);
 #endif /* PTS_COMPAT || PTS_LINUX */
 	case TIOCGPGRP:
 		/* Get the foreground process group ID. */
 		tty_lock(tp);
 		if (tp->t_pgrp != NULL)
 			*(int *)data = tp->t_pgrp->pg_id;
 		else
 			*(int *)data = NO_PID;
 		tty_unlock(tp);
 		return (0);
 	case TIOCGSID:
 		/* Get the session leader process ID. */
 		tty_lock(tp);
 		if (tp->t_session == NULL)
 			error = ENOTTY;
 		else
 			*(int *)data = tp->t_session->s_sid;
 		tty_unlock(tp);
 		return (error);
 	case TIOCPTMASTER:
 		/* Yes, we are a pseudo-terminal master. */
 		return (0);
 	case TIOCSIG:
 		/* Signal the foreground process group. */
 		sig = *(int *)data;
 		if (sig < 1 || sig >= NSIG)
 			return (EINVAL);
 
 		tty_lock(tp);
 		tty_signal_pgrp(tp, sig);
 		tty_unlock(tp);
 		return (0);
 	case TIOCPKT:
 		/* Enable/disable packet mode. */
 		tty_lock(tp);
 		if (*(int *)data)
 			psc->pts_flags |= PTS_PKT;
 		else
 			psc->pts_flags &= ~PTS_PKT;
 		tty_unlock(tp);
 		return (0);
 	}
 
 	/* Just redirect this ioctl to the slave device. */
 	tty_lock(tp);
 	error = tty_ioctl(tp, cmd, data, fp->f_flag, td);
 	tty_unlock(tp);
 	if (error == ENOIOCTL)
 		error = ENOTTY;
 
 	return (error);
 }
 
 static int
 ptsdev_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 	int revents = 0;
 
 	tty_lock(tp);
 
 	if (psc->pts_flags & PTS_FINISHED) {
 		/* Slave device is not opened. */
 		tty_unlock(tp);
 		return ((events & (POLLIN|POLLRDNORM)) | POLLHUP);
 	}
 
 	if (events & (POLLIN|POLLRDNORM)) {
 		/* See if we can getc something. */
 		if (ttydisc_getc_poll(tp) ||
 		    (psc->pts_flags & PTS_PKT && psc->pts_pkt))
 			revents |= events & (POLLIN|POLLRDNORM);
 	}
 	if (events & (POLLOUT|POLLWRNORM)) {
 		/* See if we can rint something. */
 		if (ttydisc_rint_poll(tp))
 			revents |= events & (POLLOUT|POLLWRNORM);
 	}
 
 	/*
 	 * No need to check for POLLHUP here. This device cannot be used
 	 * as a callout device, which means we always have a carrier,
 	 * because the master is.
 	 */
 
 	if (revents == 0) {
 		/*
 		 * This code might look misleading, but the naming of
 		 * poll events on this side is the opposite of the slave
 		 * device.
 		 */
 		if (events & (POLLIN|POLLRDNORM))
 			selrecord(td, &psc->pts_outpoll);
 		if (events & (POLLOUT|POLLWRNORM))
 			selrecord(td, &psc->pts_inpoll);
 	}
 
 	tty_unlock(tp);
 
 	return (revents);
 }
 
 /*
  * kqueue support.
  */
 
 static void
 pts_kqops_read_detach(struct knote *kn)
 {
 	struct file *fp = kn->kn_fp;
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 
 	knlist_remove(&psc->pts_outpoll.si_note, kn, 0);
 }
 
 static int
 pts_kqops_read_event(struct knote *kn, long hint)
 {
 	struct file *fp = kn->kn_fp;
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 
 	if (psc->pts_flags & PTS_FINISHED) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	} else {
 		kn->kn_data = ttydisc_getc_poll(tp);
 		return (kn->kn_data > 0);
 	}
 }
 
 static void
 pts_kqops_write_detach(struct knote *kn)
 {
 	struct file *fp = kn->kn_fp;
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 
 	knlist_remove(&psc->pts_inpoll.si_note, kn, 0);
 }
 
 static int
 pts_kqops_write_event(struct knote *kn, long hint)
 {
 	struct file *fp = kn->kn_fp;
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 
 	if (psc->pts_flags & PTS_FINISHED) {
 		kn->kn_flags |= EV_EOF;
 		return (1);
 	} else {
 		kn->kn_data = ttydisc_rint_poll(tp);
 		return (kn->kn_data > 0);
 	}
 }
 
 static struct filterops pts_kqops_read = {
 	.f_isfd = 1,
 	.f_detach = pts_kqops_read_detach,
 	.f_event = pts_kqops_read_event,
 };
 static struct filterops pts_kqops_write = {
 	.f_isfd = 1,
 	.f_detach = pts_kqops_write_detach,
 	.f_event = pts_kqops_write_event,
 };
 
 static int
 ptsdev_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct tty *tp = fp->f_data;
 	struct pts_softc *psc = tty_softc(tp);
 	int error = 0;
 
 	tty_lock(tp);
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &pts_kqops_read;
 		knlist_add(&psc->pts_outpoll.si_note, kn, 1);
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &pts_kqops_write;
 		knlist_add(&psc->pts_inpoll.si_note, kn, 1);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	tty_unlock(tp);
 	return (error);
 }
 
 static int
 ptsdev_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
     struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 #ifdef PTS_EXTERNAL
 	struct pts_softc *psc = tty_softc(tp);
 #endif /* PTS_EXTERNAL */
 	struct cdev *dev = tp->t_dev;
 
 	/*
 	 * According to POSIX, we must implement an fstat(). This also
 	 * makes this implementation compatible with Linux binaries,
 	 * because Linux calls fstat() on the pseudo-terminal master to
 	 * obtain st_rdev.
 	 *
 	 * XXX: POSIX also mentions we must fill in st_dev, but how?
 	 */
 
 	bzero(sb, sizeof *sb);
 #ifdef PTS_EXTERNAL
 	if (psc->pts_cdev != NULL)
 		sb->st_ino = sb->st_rdev = dev2udev(psc->pts_cdev);
 	else
 #endif /* PTS_EXTERNAL */
 		sb->st_ino = sb->st_rdev = tty_udev(tp);
 
 	sb->st_atim = dev->si_atime;
 	sb->st_ctim = dev->si_ctime;
 	sb->st_mtim = dev->si_mtime;
 	sb->st_uid = dev->si_uid;
 	sb->st_gid = dev->si_gid;
 	sb->st_mode = dev->si_mode | S_IFCHR;
 
 	return (0);
 }
 
 static int
 ptsdev_close(struct file *fp, struct thread *td)
 {
 	struct tty *tp = fp->f_data;
 
 	/* Deallocate TTY device. */
 	tty_lock(tp);
 	tty_rel_gone(tp);
 
 	/*
 	 * Open of /dev/ptmx or /dev/ptyXX changes the type of file
 	 * from DTYPE_VNODE to DTYPE_PTS. vn_open() increases vnode
 	 * use count, we need to decrement it, and possibly do other
 	 * required cleanup.
 	 */
 	if (fp->f_vnode != NULL)
 		return (vnops.fo_close(fp, td));
 
 	return (0);
 }
 
 static int
 ptsdev_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct tty *tp;
 
 	kif->kf_type = KF_TYPE_PTS;
 	tp = fp->f_data;
 	kif->kf_un.kf_pts.kf_pts_dev = tty_udev(tp);
+	kif->kf_un.kf_pts.kf_pts_dev_freebsd11 =
+	    kif->kf_un.kf_pts.kf_pts_dev; /* truncate */
 	strlcpy(kif->kf_path, tty_devname(tp), sizeof(kif->kf_path));
 	return (0);
 }
 
 static struct fileops ptsdev_ops = {
 	.fo_read	= ptsdev_read,
 	.fo_write	= ptsdev_write,
 	.fo_truncate	= invfo_truncate,
 	.fo_ioctl	= ptsdev_ioctl,
 	.fo_poll	= ptsdev_poll,
 	.fo_kqfilter	= ptsdev_kqfilter,
 	.fo_stat	= ptsdev_stat,
 	.fo_close	= ptsdev_close,
 	.fo_chmod	= invfo_chmod,
 	.fo_chown	= invfo_chown,
 	.fo_sendfile	= invfo_sendfile,
 	.fo_fill_kinfo	= ptsdev_fill_kinfo,
 	.fo_flags	= DFLAG_PASSABLE,
 };
 
 /*
  * Driver-side hooks.
  */
 
 static void
 ptsdrv_outwakeup(struct tty *tp)
 {
 	struct pts_softc *psc = tty_softc(tp);
 
 	cv_broadcast(&psc->pts_outwait);
 	selwakeup(&psc->pts_outpoll);
 	KNOTE_LOCKED(&psc->pts_outpoll.si_note, 0);
 }
 
 static void
 ptsdrv_inwakeup(struct tty *tp)
 {
 	struct pts_softc *psc = tty_softc(tp);
 
 	cv_broadcast(&psc->pts_inwait);
 	selwakeup(&psc->pts_inpoll);
 	KNOTE_LOCKED(&psc->pts_inpoll.si_note, 0);
 }
 
 static int
 ptsdrv_open(struct tty *tp)
 {
 	struct pts_softc *psc = tty_softc(tp);
 
 	psc->pts_flags &= ~PTS_FINISHED;
 
 	return (0);
 }
 
 static void
 ptsdrv_close(struct tty *tp)
 {
 	struct pts_softc *psc = tty_softc(tp);
 
 	/* Wake up any blocked readers/writers. */
 	psc->pts_flags |= PTS_FINISHED;
 	ptsdrv_outwakeup(tp);
 	ptsdrv_inwakeup(tp);
 }
 
 static void
 ptsdrv_pktnotify(struct tty *tp, char event)
 {
 	struct pts_softc *psc = tty_softc(tp);
 
 	/*
 	 * Clear conflicting flags.
 	 */
 
 	switch (event) {
 	case TIOCPKT_STOP:
 		psc->pts_pkt &= ~TIOCPKT_START;
 		break;
 	case TIOCPKT_START:
 		psc->pts_pkt &= ~TIOCPKT_STOP;
 		break;
 	case TIOCPKT_NOSTOP:
 		psc->pts_pkt &= ~TIOCPKT_DOSTOP;
 		break;
 	case TIOCPKT_DOSTOP:
 		psc->pts_pkt &= ~TIOCPKT_NOSTOP;
 		break;
 	}
 
 	psc->pts_pkt |= event;
 	ptsdrv_outwakeup(tp);
 }
 
 static void
 ptsdrv_free(void *softc)
 {
 	struct pts_softc *psc = softc;
 
 	/* Make device number available again. */
 	if (psc->pts_unit >= 0)
 		free_unr(pts_pool, psc->pts_unit);
 
 	chgptscnt(psc->pts_cred->cr_ruidinfo, -1, 0);
 	racct_sub_cred(psc->pts_cred, RACCT_NPTS, 1);
 	crfree(psc->pts_cred);
 
 	seldrain(&psc->pts_inpoll);
 	seldrain(&psc->pts_outpoll);
 	knlist_destroy(&psc->pts_inpoll.si_note);
 	knlist_destroy(&psc->pts_outpoll.si_note);
 
 #ifdef PTS_EXTERNAL
 	/* Destroy master device as well. */
 	if (psc->pts_cdev != NULL)
 		destroy_dev_sched(psc->pts_cdev);
 #endif /* PTS_EXTERNAL */
 
 	free(psc, M_PTS);
 }
 
 static struct ttydevsw pts_class = {
 	.tsw_flags	= TF_NOPREFIX,
 	.tsw_outwakeup	= ptsdrv_outwakeup,
 	.tsw_inwakeup	= ptsdrv_inwakeup,
 	.tsw_open	= ptsdrv_open,
 	.tsw_close	= ptsdrv_close,
 	.tsw_pktnotify	= ptsdrv_pktnotify,
 	.tsw_free	= ptsdrv_free,
 };
 
 #ifndef PTS_EXTERNAL
 static
 #endif /* !PTS_EXTERNAL */
 int
 pts_alloc(int fflags, struct thread *td, struct file *fp)
 {
 	int unit, ok, error;
 	struct tty *tp;
 	struct pts_softc *psc;
 	struct proc *p = td->td_proc;
 	struct ucred *cred = td->td_ucred;
 
 	/* Resource limiting. */
 	PROC_LOCK(p);
 	error = racct_add(p, RACCT_NPTS, 1);
 	if (error != 0) {
 		PROC_UNLOCK(p);
 		return (EAGAIN);
 	}
 	ok = chgptscnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPTS));
 	if (!ok) {
 		racct_sub(p, RACCT_NPTS, 1);
 		PROC_UNLOCK(p);
 		return (EAGAIN);
 	}
 	PROC_UNLOCK(p);
 
 	/* Try to allocate a new pts unit number. */
 	unit = alloc_unr(pts_pool);
 	if (unit < 0) {
 		racct_sub(p, RACCT_NPTS, 1);
 		chgptscnt(cred->cr_ruidinfo, -1, 0);
 		return (EAGAIN);
 	}
 
 	/* Allocate TTY and softc. */
 	psc = malloc(sizeof(struct pts_softc), M_PTS, M_WAITOK|M_ZERO);
 	cv_init(&psc->pts_inwait, "ptsin");
 	cv_init(&psc->pts_outwait, "ptsout");
 
 	psc->pts_unit = unit;
 	psc->pts_cred = crhold(cred);
 
 	tp = tty_alloc(&pts_class, psc);
 	knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx);
 	knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx);
 
 	/* Expose the slave device as well. */
 	tty_makedev(tp, td->td_ucred, "pts/%u", psc->pts_unit);
 
 	finit(fp, fflags, DTYPE_PTS, tp, &ptsdev_ops);
 
 	return (0);
 }
 
 #ifdef PTS_EXTERNAL
 int
 pts_alloc_external(int fflags, struct thread *td, struct file *fp,
     struct cdev *dev, const char *name)
 {
 	int ok, error;
 	struct tty *tp;
 	struct pts_softc *psc;
 	struct proc *p = td->td_proc;
 	struct ucred *cred = td->td_ucred;
 
 	/* Resource limiting. */
 	PROC_LOCK(p);
 	error = racct_add(p, RACCT_NPTS, 1);
 	if (error != 0) {
 		PROC_UNLOCK(p);
 		return (EAGAIN);
 	}
 	ok = chgptscnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPTS));
 	if (!ok) {
 		racct_sub(p, RACCT_NPTS, 1);
 		PROC_UNLOCK(p);
 		return (EAGAIN);
 	}
 	PROC_UNLOCK(p);
 
 	/* Allocate TTY and softc. */
 	psc = malloc(sizeof(struct pts_softc), M_PTS, M_WAITOK|M_ZERO);
 	cv_init(&psc->pts_inwait, "ptsin");
 	cv_init(&psc->pts_outwait, "ptsout");
 
 	psc->pts_unit = -1;
 	psc->pts_cdev = dev;
 	psc->pts_cred = crhold(cred);
 
 	tp = tty_alloc(&pts_class, psc);
 	knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx);
 	knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx);
 
 	/* Expose the slave device as well. */
 	tty_makedev(tp, td->td_ucred, "%s", name);
 
 	finit(fp, fflags, DTYPE_PTS, tp, &ptsdev_ops);
 
 	return (0);
 }
 #endif /* PTS_EXTERNAL */
 
 int
 sys_posix_openpt(struct thread *td, struct posix_openpt_args *uap)
 {
 	int error, fd;
 	struct file *fp;
 
 	/*
 	 * POSIX states it's unspecified when other flags are passed. We
 	 * don't allow this.
 	 */
 	if (uap->flags & ~(O_RDWR|O_NOCTTY|O_CLOEXEC))
 		return (EINVAL);
 
 	error = falloc(td, &fp, &fd, uap->flags);
 	if (error)
 		return (error);
 
 	/* Allocate the actual pseudo-TTY. */
 	error = pts_alloc(FFLAGS(uap->flags & O_ACCMODE), td, fp);
 	if (error != 0) {
 		fdclose(td, fp, fd);
 		fdrop(fp, td);
 		return (error);
 	}
 
 	/* Pass it back to userspace. */
 	td->td_retval[0] = fd;
 	fdrop(fp, td);
 
 	return (0);
 }
 
 static void
 pts_init(void *unused)
 {
 
 	pts_pool = new_unrhdr(0, INT_MAX, NULL);
 }
 
 SYSINIT(pts, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, pts_init, NULL);
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c	(revision 318735)
+++ head/sys/kern/vfs_syscalls.c	(revision 318736)
@@ -1,4388 +1,4568 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/disk.h>
 #include <sys/sysent.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 #include <sys/jail.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #include <ufs/ufs/quota.h>
 
 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
 
 SDT_PROVIDER_DEFINE(vfs);
 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
 
 static int kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag);
 static int setfflags(struct thread *td, struct vnode *, u_long);
 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
 static int getutimens(const struct timespec *, enum uio_seg,
     struct timespec *, int *);
 static int setutimes(struct thread *td, struct vnode *,
     const struct timespec *, int, int);
 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
     struct thread *td);
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
 	int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sync(struct thread *td, struct sync_args *uap)
 {
 	struct mount *mp, *nmp;
 	int save;
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			save = curthread_pflags_set(TDP_SYNCIO);
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT);
 			curthread_pflags_restore(save);
 			vn_finished_write(mp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 	return (0);
 }
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 int
 sys_quotactl(struct thread *td, struct quotactl_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_CMD(uap->cmd);
 	AUDIT_ARG_UID(uap->uid);
 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
 		return (EPERM);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	mp = nd.ni_vp->v_mount;
 	vfs_ref(mp);
 	vput(nd.ni_vp);
 	error = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (error != 0)
 		return (error);
 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
 
 	/*
 	 * Since quota on operation typically needs to open quota
 	 * file, the Q_QUOTAON handler needs to unbusy the mount point
 	 * before calling into namei.  Otherwise, unmount might be
 	 * started between two vfs_busy() invocations (first is our,
 	 * second is from mount point cross-walk code in lookup()),
 	 * causing deadlock.
 	 *
 	 * Require that Q_QUOTAON handles the vfs_busy() reference on
 	 * its own, always returning with ubusied mount point.
 	 */
 	if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON)
 		vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Used by statfs conversion routines to scale the block size up if
  * necessary so that all of the block counts are <= 'max_size'.  Note
  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  * value of 'n'.
  */
 void
 statfs_scale_blocks(struct statfs *sf, long max_size)
 {
 	uint64_t count;
 	int shift;
 
 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
 
 	/*
 	 * Attempt to scale the block counts to give a more accurate
 	 * overview to userland of the ratio of free space to used
 	 * space.  To do this, find the largest block count and compute
 	 * a divisor that lets it fit into a signed integer <= max_size.
 	 */
 	if (sf->f_bavail < 0)
 		count = -sf->f_bavail;
 	else
 		count = sf->f_bavail;
 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
 	if (count <= max_size)
 		return;
 
 	count >>= flsl(max_size);
 	shift = 0;
 	while (count > 0) {
 		shift++;
 		count >>=1;
 	}
 
 	sf->f_bsize <<= shift;
 	sf->f_blocks >>= shift;
 	sf->f_bfree >>= shift;
 	sf->f_bavail >>= shift;
 }
 
 static int
 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
 {
 	struct statfs *sp;
 	int error;
 
 	if (mp == NULL)
 		return (EBADF);
 	error = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (error != 0)
 		return (error);
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	/*
 	 * Set these in case the underlying filesystem fails to do so.
 	 */
 	sp = &mp->mnt_stat;
 	sp->f_version = STATFS_VERSION;
 	sp->f_namemax = NAME_MAX;
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp);
 	if (error != 0)
 		goto out;
 	*buf = *sp;
 	if (priv_check(td, PRIV_VFS_GENERATION)) {
 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
 		prison_enforce_statfs(td->td_ucred, mp, buf);
 	}
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 int
 sys_statfs(struct thread *td, struct statfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
     struct statfs *buf)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	vfs_ref(mp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
 {
 	struct file *fp;
 	struct mount *mp;
 	struct vnode *vp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 #ifdef AUDIT
 	AUDIT_ARG_VNODE1(vp);
 #endif
 	mp = vp->v_mount;
 	if (mp != NULL)
 		vfs_ref(mp);
 	VOP_UNLOCK(vp, 0);
 	fdrop(fp, td);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
 {
 	size_t count;
 	int error;
 
 	if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
 		return (EINVAL);
 	error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
 	    UIO_USERSPACE, uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 
 /*
  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  *	The caller is responsible for freeing memory which will be allocated
  *	in '*buf'.
  */
 int
 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
     size_t *countp, enum uio_seg bufseg, int mode)
 {
 	struct mount *mp, *nmp;
 	struct statfs *sfsp, *sp, *sptmp, *tofree;
 	size_t count, maxcount;
 	int error;
 
 	switch (mode) {
 	case MNT_WAIT:
 	case MNT_NOWAIT:
 		break;
 	default:
 		if (bufseg == UIO_SYSSPACE)
 			*buf = NULL;
 		return (EINVAL);
 	}
 restart:
 	maxcount = bufsize / sizeof(struct statfs);
 	if (bufsize == 0) {
 		sfsp = NULL;
 		tofree = NULL;
 	} else if (bufseg == UIO_USERSPACE) {
 		sfsp = *buf;
 		tofree = NULL;
 	} else /* if (bufseg == UIO_SYSSPACE) */ {
 		count = 0;
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			count++;
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (maxcount > count)
 			maxcount = count;
 		tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
 		    M_STATFS, M_WAITOK);
 	}
 	count = 0;
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (prison_canseemount(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #ifdef MAC
 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #endif
 		if (mode == MNT_WAIT) {
 			if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
 				/*
 				 * If vfs_busy() failed, and MBF_NOWAIT
 				 * wasn't passed, then the mp is gone.
 				 * Furthermore, because of MBF_MNTLSTLOCK,
 				 * the mountlist_mtx was dropped.  We have
 				 * no other choice than to start over.
 				 */
 				mtx_unlock(&mountlist_mtx);
 				free(tofree, M_STATFS);
 				goto restart;
 			}
 		} else {
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 		}
 		if (sfsp != NULL && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
 			 * Set these in case the underlying filesystem
 			 * fails to do so.
 			 */
 			sp->f_version = STATFS_VERSION;
 			sp->f_namemax = NAME_MAX;
 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 			/*
 			 * If MNT_NOWAIT is specified, do not refresh
 			 * the fsstat cache.
 			 */
 			if (mode != MNT_NOWAIT) {
 				error = VFS_STATFS(mp, sp);
 				if (error != 0) {
 					mtx_lock(&mountlist_mtx);
 					nmp = TAILQ_NEXT(mp, mnt_list);
 					vfs_unbusy(mp);
 					continue;
 				}
 			}
 			if (priv_check(td, PRIV_VFS_GENERATION)) {
 				sptmp = malloc(sizeof(struct statfs), M_STATFS,
 				    M_WAITOK);
 				*sptmp = *sp;
 				sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
 				prison_enforce_statfs(td->td_ucred, mp, sptmp);
 				sp = sptmp;
 			} else
 				sptmp = NULL;
 			if (bufseg == UIO_SYSSPACE) {
 				bcopy(sp, sfsp, sizeof(*sp));
 				free(sptmp, M_STATFS);
 			} else /* if (bufseg == UIO_USERSPACE) */ {
 				error = copyout(sp, sfsp, sizeof(*sp));
 				free(sptmp, M_STATFS);
 				if (error != 0) {
 					vfs_unbusy(mp);
 					return (error);
 				}
 			}
 			sfsp++;
 		}
 		count++;
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 	if (sfsp != NULL && count > maxcount)
 		*countp = maxcount;
 	else
 		*countp = count;
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD4
 /*
  * Get old format filesystem statistics.
  */
-static void cvtstatfs(struct statfs *, struct ostatfs *);
+static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_statfs_args {
 	char *path;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0) {
-		cvtstatfs(sfp, &osb);
+		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fstatfs_args {
 	int fd;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0) {
-		cvtstatfs(sfp, &osb);
+		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_getfsstat_args {
 	struct ostatfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct ostatfs osb;
 	size_t count, size;
 	int error;
 
 	if (uap->bufsize < 0)
 		return (EINVAL);
 	count = uap->bufsize / sizeof(struct ostatfs);
 	if (count > SIZE_MAX / sizeof(struct statfs))
 		return (EINVAL);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
 	    uap->mode);
-	td->td_retval[0] = count;
+	if (error == 0)
+		td->td_retval[0] = count;
 	if (size != 0) {
 		sp = buf;
 		while (count != 0 && error == 0) {
-			cvtstatfs(sp, &osb);
+			freebsd4_cvtstatfs(sp, &osb);
 			error = copyout(&osb, uap->buf, sizeof(osb));
 			sp++;
 			uap->buf++;
 			count--;
 		}
 		free(buf, M_STATFS);
 	}
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0) {
-		cvtstatfs(sfp, &osb);
+		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Convert a new format statfs structure to an old format statfs structure.
  */
 static void
-cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
+freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
 {
 
 	statfs_scale_blocks(nsp, LONG_MAX);
 	bzero(osp, sizeof(*osp));
 	osp->f_bsize = nsp->f_bsize;
 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
 	osp->f_blocks = nsp->f_blocks;
 	osp->f_bfree = nsp->f_bfree;
 	osp->f_bavail = nsp->f_bavail;
 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
 	osp->f_owner = nsp->f_owner;
 	osp->f_type = nsp->f_type;
 	osp->f_flags = nsp->f_flags;
 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
 	    MIN(MFSNAMELEN, OMFSNAMELEN));
 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	osp->f_fsid = nsp->f_fsid;
 }
 #endif /* COMPAT_FREEBSD4 */
 
+#if defined(COMPAT_FREEBSD11)
 /*
+ * Get old format filesystem statistics.
+ */
+static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
+
+int
+freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
+{
+	struct freebsd11_statfs osb;
+	struct statfs *sfp;
+	int error;
+
+	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
+	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
+	if (error == 0) {
+		freebsd11_cvtstatfs(sfp, &osb);
+		error = copyout(&osb, uap->buf, sizeof(osb));
+	}
+	free(sfp, M_STATFS);
+	return (error);
+}
+
+/*
+ * Get filesystem statistics.
+ */
+int
+freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
+{
+	struct freebsd11_statfs osb;
+	struct statfs *sfp;
+	int error;
+
+	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
+	error = kern_fstatfs(td, uap->fd, sfp);
+	if (error == 0) {
+		freebsd11_cvtstatfs(sfp, &osb);
+		error = copyout(&osb, uap->buf, sizeof(osb));
+	}
+	free(sfp, M_STATFS);
+	return (error);
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+int
+freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
+{
+	struct freebsd11_statfs osb;
+	struct statfs *buf, *sp;
+	size_t count, size;
+	int error;
+
+	count = uap->bufsize / sizeof(struct ostatfs);
+	size = count * sizeof(struct statfs);
+	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
+	    uap->mode);
+	if (error == 0)
+		td->td_retval[0] = count;
+	if (size > 0) {
+		sp = buf;
+		while (count > 0 && error == 0) {
+			freebsd11_cvtstatfs(sp, &osb);
+			error = copyout(&osb, uap->buf, sizeof(osb));
+			sp++;
+			uap->buf++;
+			count--;
+		}
+		free(buf, M_STATFS);
+	}
+	return (error);
+}
+
+/*
+ * Implement fstatfs() for (NFS) file handles.
+ */
+int
+freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
+{
+	struct freebsd11_statfs osb;
+	struct statfs *sfp;
+	fhandle_t fh;
+	int error;
+
+	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
+	if (error)
+		return (error);
+	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
+	error = kern_fhstatfs(td, fh, sfp);
+	if (error == 0) {
+		freebsd11_cvtstatfs(sfp, &osb);
+		error = copyout(&osb, uap->buf, sizeof(osb));
+	}
+	free(sfp, M_STATFS);
+	return (error);
+}
+
+/*
+ * Convert a new format statfs structure to an old format statfs structure.
+ */
+static void
+freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
+{
+
+	bzero(osp, sizeof(*osp));
+	osp->f_version = FREEBSD11_STATFS_VERSION;
+	osp->f_type = nsp->f_type;
+	osp->f_flags = nsp->f_flags;
+	osp->f_bsize = nsp->f_bsize;
+	osp->f_iosize = nsp->f_iosize;
+	osp->f_blocks = nsp->f_blocks;
+	osp->f_bfree = nsp->f_bfree;
+	osp->f_bavail = nsp->f_bavail;
+	osp->f_files = nsp->f_files;
+	osp->f_ffree = nsp->f_ffree;
+	osp->f_syncwrites = nsp->f_syncwrites;
+	osp->f_asyncwrites = nsp->f_asyncwrites;
+	osp->f_syncreads = nsp->f_syncreads;
+	osp->f_asyncreads = nsp->f_asyncreads;
+	osp->f_namemax = nsp->f_namemax;
+	osp->f_owner = nsp->f_owner;
+	osp->f_fsid = nsp->f_fsid;
+	strlcpy(osp->f_fstypename, nsp->f_fstypename,
+	    MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
+	strlcpy(osp->f_mntonname, nsp->f_mntonname,
+	    MIN(MNAMELEN, sizeof(osp->f_mntonname)));
+	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
+	    MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
+}
+#endif /* COMPAT_FREEBSD11 */
+
+/*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 int
 sys_fchdir(struct thread *td, struct fchdir_args *uap)
 {
 	struct vnode *vp, *tdp;
 	struct mount *mp;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR),
 	    &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	vrefact(vp);
 	fdrop(fp, td);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	error = change_dir(vp, td);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0))
 			continue;
 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
 		vfs_unbusy(mp);
 		if (error != 0)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0);
 	pwd_chdir(td, vp);
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 int
 sys_chdir(struct thread *td, struct chdir_args *uap)
 {
 
 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
 		vput(nd.ni_vp);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		return (error);
 	}
 	VOP_UNLOCK(nd.ni_vp, 0);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	pwd_chdir(td, nd.ni_vp);
 	return (0);
 }
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 int
 sys_chroot(struct thread *td, struct chroot_args *uap)
 {
 	struct nameidata nd;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_CHROOT);
 	if (error != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error != 0)
 		goto error;
 	error = change_dir(nd.ni_vp, td);
 	if (error != 0)
 		goto e_vunlock;
 #ifdef MAC
 	error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
 	if (error != 0)
 		goto e_vunlock;
 #endif
 	VOP_UNLOCK(nd.ni_vp, 0);
 	error = pwd_chroot(td, nd.ni_vp);
 	vrele(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 e_vunlock:
 	vput(nd.ni_vp);
 error:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  * instance.
  */
 int
 change_dir(struct vnode *vp, struct thread *td)
 {
 #ifdef MAC
 	int error;
 #endif
 
 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 #ifdef MAC
 	error = mac_vnode_check_chdir(td->td_ucred, vp);
 	if (error != 0)
 		return (error);
 #endif
 	return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
 }
 
 static __inline void
 flags_to_rights(int flags, cap_rights_t *rightsp)
 {
 
 	if (flags & O_EXEC) {
 		cap_rights_set(rightsp, CAP_FEXECVE);
 	} else {
 		switch ((flags & O_ACCMODE)) {
 		case O_RDONLY:
 			cap_rights_set(rightsp, CAP_READ);
 			break;
 		case O_RDWR:
 			cap_rights_set(rightsp, CAP_READ);
 			/* FALLTHROUGH */
 		case O_WRONLY:
 			cap_rights_set(rightsp, CAP_WRITE);
 			if (!(flags & (O_APPEND | O_TRUNC)))
 				cap_rights_set(rightsp, CAP_SEEK);
 			break;
 		}
 	}
 
 	if (flags & O_CREAT)
 		cap_rights_set(rightsp, CAP_CREATE);
 
 	if (flags & O_TRUNC)
 		cap_rights_set(rightsp, CAP_FTRUNCATE);
 
 	if (flags & (O_SYNC | O_FSYNC))
 		cap_rights_set(rightsp, CAP_FSYNC);
 
 	if (flags & (O_EXLOCK | O_SHLOCK))
 		cap_rights_set(rightsp, CAP_FLOCK);
 }
 
 /*
  * Check permissions, allocate an open file structure, and call the device
  * open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 sys_open(struct thread *td, struct open_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct openat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 	int	mode;
 };
 #endif
 int
 sys_openat(struct thread *td, struct openat_args *uap)
 {
 
 	AUDIT_ARG_FD(uap->fd);
 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->mode));
 }
 
 int
 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     int flags, int mode)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int cmode, error, indx;
 
 	indx = -1;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
 	cap_rights_init(&rights, CAP_LOOKUP);
 	flags_to_rights(flags, &rights);
 	/*
 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 	 * may be specified.
 	 */
 	if (flags & O_EXEC) {
 		if (flags & O_ACCMODE)
 			return (EINVAL);
 	} else if ((flags & O_ACCMODE) == O_ACCMODE) {
 		return (EINVAL);
 	} else {
 		flags = FFLAGS(flags);
 	}
 
 	/*
 	 * Allocate a file structure. The descriptor to reference it
 	 * is allocated and set by finstall() below.
 	 */
 	error = falloc_noinstall(td, &fp);
 	if (error != 0)
 		return (error);
 	/*
 	 * An extra reference on `fp' has been held for us by
 	 * falloc_noinstall().
 	 */
 	/* Set the flags early so the finit in devfs can pick them up. */
 	fp->f_flag = flags & FMASK;
 	cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 	    &rights, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, fp);
 	if (error != 0) {
 		/*
 		 * If the vn_open replaced the method vector, something
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
 		if (error == ENXIO && fp->f_ops != &badfileops)
 			goto success;
 
 		/*
 		 * Handle special fdopen() case. bleh.
 		 *
 		 * Don't do this for relative (capability) lookups; we don't
 		 * understand exactly what would happen, and we don't think
 		 * that it ever should.
 		 */
 		if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 &&
 		    (error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0) {
 			error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 			    &indx);
 			if (error == 0)
 				goto success;
 		}
 
 		goto bad;
 	}
 	td->td_dupfd = 0;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	/*
 	 * Store the vnode, for any f_type. Typically, the vnode use
 	 * count is decremented by direct call to vn_closefile() for
 	 * files that switched type in the cdevsw fdopen() method.
 	 */
 	fp->f_vnode = vp;
 	/*
 	 * If the file wasn't claimed by devfs bind it to the normal
 	 * vnode operations here.
 	 */
 	if (fp->f_ops == &badfileops) {
 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 		fp->f_seqcount = 1;
 		finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 		    DTYPE_VNODE, vp, &vnops);
 	}
 
 	VOP_UNLOCK(vp, 0);
 	if (flags & O_TRUNC) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 success:
 	/*
 	 * If we haven't already installed the FD (for dupfdopen), do so now.
 	 */
 	if (indx == -1) {
 		struct filecaps *fcaps;
 
 #ifdef CAPABILITIES
 		if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0)
 			fcaps = &nd.ni_filecaps;
 		else
 #endif
 			fcaps = NULL;
 		error = finstall(td, fp, &indx, flags, fcaps);
 		/* On success finstall() consumes fcaps. */
 		if (error != 0) {
 			filecaps_free(&nd.ni_filecaps);
 			goto bad;
 		}
 	} else {
 		filecaps_free(&nd.ni_filecaps);
 	}
 
 	/*
 	 * Release our private reference, leaving the one associated with
 	 * the descriptor table intact.
 	 */
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 	fdrop(fp, td);
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(struct thread *td, struct ocreat_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
-struct mknod_args {
+struct mknodat_args {
+	int	fd;
 	char	*path;
-	int	mode;
-	int	dev;
+	mode_t	mode;
+	dev_t	dev;
 };
 #endif
 int
-sys_mknod(struct thread *td, struct mknod_args *uap)
+sys_mknodat(struct thread *td, struct mknodat_args *uap)
 {
 
+	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
+	    uap->dev));
+}
+
+#if defined(COMPAT_FREEBSD11)
+int
+freebsd11_mknod(struct thread *td,
+    struct freebsd11_mknod_args *uap)
+{
+
 	return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, uap->dev));
 }
 
-#ifndef _SYS_SYSPROTO_H_
-struct mknodat_args {
-	int	fd;
-	char	*path;
-	mode_t	mode;
-	dev_t	dev;
-};
-#endif
 int
-sys_mknodat(struct thread *td, struct mknodat_args *uap)
+freebsd11_mknodat(struct thread *td,
+    struct freebsd11_mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 	    uap->dev));
 }
+#endif /* COMPAT_FREEBSD11 */
 
 int
 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
-    int mode, int dev)
+    int mode, dev_t dev)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error, whiteout = 0;
 
 	AUDIT_ARG_MODE(mode);
 	AUDIT_ARG_DEV(dev);
 	switch (mode & S_IFMT) {
 	case S_IFCHR:
 	case S_IFBLK:
 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 		if (error == 0 && dev == VNOVAL)
 			error = EINVAL;
 		break;
 	case S_IFMT:
 		error = priv_check(td, PRIV_VFS_MKNOD_BAD);
 		break;
 	case S_IFWHT:
 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 		break;
 	case S_IFIFO:
 		if (dev == 0)
 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
 		/* FALLTHROUGH */
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error != 0)
 		return (error);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT),
 	    td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (mode & ALLPERMS) &
 		    ~td->td_proc->p_fd->fd_cmask;
 		vattr.va_rdev = dev;
 		whiteout = 0;
 
 		switch (mode & S_IFMT) {
 		case S_IFMT:	/* used by badsect to flag bad sectors */
 			vattr.va_type = VBAD;
 			break;
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			panic("kern_mknod: invalid mode");
 		}
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 #ifdef MAC
 	if (error == 0 && !whiteout)
 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 		    &nd.ni_cnd, &vattr);
 #endif
 	if (error == 0) {
 		if (whiteout)
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 		else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 {
 
 	return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifoat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 {
 
 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 int
 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT),
 	    td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out:
 #endif
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_link(struct thread *td, struct link_args *uap)
 {
 
 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 	    UIO_USERSPACE, FOLLOW));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct linkat_args {
 	int	fd1;
 	char	*path1;
 	int	fd2;
 	char	*path2;
 	int	flag;
 };
 #endif
 int
 sys_linkat(struct thread *td, struct linkat_args *uap)
 {
 	int flag;
 
 	flag = uap->flag;
 	if (flag & ~AT_SYMLINK_FOLLOW)
 		return (EINVAL);
 
 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 	    UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW));
 }
 
 int hardlink_check_uid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
     &hardlink_check_uid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "users");
 static int hardlink_check_gid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
     &hardlink_check_gid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "groups");
 
 static int
 can_hardlink(struct vnode *vp, struct ucred *cred)
 {
 	struct vattr va;
 	int error;
 
 	if (!hardlink_check_uid && !hardlink_check_gid)
 		return (0);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error != 0)
 		return (error);
 
 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 		if (error != 0)
 			return (error);
 	}
 
 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK, 0);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 int
 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
     enum uio_seg segflg, int follow)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 again:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1,
 	    cap_rights_init(&rights, CAP_LINKAT_SOURCE), td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR) {
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
 	NDINIT_ATRIGHTS(&nd, CREATE,
 	    LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2,
 	    cap_rights_init(&rights, CAP_LINKAT_TARGET), td);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			vrele(nd.ni_vp);
 			vrele(vp);
 			return (EEXIST);
 		} else if (nd.ni_dvp->v_mount != vp->v_mount) {
 			/*
 			 * Cross-device link.  No need to recheck
 			 * vp->v_type, since it cannot change, except
 			 * to VBAD.
 			 */
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vput(nd.ni_dvp);
 			vrele(vp);
 			return (EXDEV);
 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 			error = can_hardlink(vp, td->td_ucred);
 #ifdef MAC
 			if (error == 0)
 				error = mac_vnode_check_link(td->td_ucred,
 				    nd.ni_dvp, vp, &nd.ni_cnd);
 #endif
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE(&nd, NDF_ONLY_PNBUF);
 				return (error);
 			}
 			error = vn_start_write(vp, &mp, V_NOWAIT);
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE(&nd, NDF_ONLY_PNBUF);
 				error = vn_start_write(NULL, &mp,
 				    V_XSLEEP | PCATCH);
 				if (error != 0)
 					return (error);
 				goto again;
 			}
 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_UNLOCK(vp, 0);
 			vput(nd.ni_dvp);
 			vn_finished_write(mp);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		} else {
 			vput(nd.ni_dvp);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vrele(vp);
 			goto again;
 		}
 	}
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_symlink(struct thread *td, struct symlink_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 	    UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct symlinkat_args {
 	char	*path;
 	int	fd;
 	char	*path2;
 };
 #endif
 int
 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 	    UIO_USERSPACE));
 }
 
 int
 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
     enum uio_seg segflg)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	char *syspath;
 	struct nameidata nd;
 	int error;
 	cap_rights_t rights;
 
 	if (segflg == UIO_SYSSPACE) {
 		syspath = path1;
 	} else {
 		syspath = uma_zalloc(namei_zone, M_WAITOK);
 		if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0)
 			goto out;
 	}
 	AUDIT_ARG_TEXT(syspath);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT),
 	    td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			goto out;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	vattr.va_type = VLNK;
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out2;
 #endif
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out2:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 out:
 	if (segflg != UIO_SYSSPACE)
 		uma_zfree(namei_zone, syspath);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct undelete_args {
 	char *path;
 };
 #endif
 int
 sys_undelete(struct thread *td, struct undelete_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 int
 sys_unlink(struct thread *td, struct unlink_args *uap)
 {
 
 	return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct unlinkat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 };
 #endif
 int
 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 {
 	int flag = uap->flag;
 	int fd = uap->fd;
 	char *path = uap->path;
 
 	if (flag & ~AT_REMOVEDIR)
 		return (EINVAL);
 
 	if (flag & AT_REMOVEDIR)
 		return (kern_rmdirat(td, fd, path, UIO_USERSPACE));
 	else
 		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0));
 }
 
 int
 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     ino_t oldinum)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct nameidata nd;
 	struct stat sb;
 	cap_rights_t rights;
 	int error;
 
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 	if ((error = namei(&nd)) != 0)
 		return (error == EINVAL ? EPERM : error);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR && oldinum == 0) {
 		error = EPERM;		/* POSIX */
 	} else if (oldinum != 0 &&
 		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 		  sb.st_ino != oldinum) {
 			error = EIDRM;	/* Identifier removed */
 	} else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_vflag & VV_ROOT)
 			error = EBUSY;
 	}
 	if (error == 0) {
 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vput(nd.ni_dvp);
 			if (vp == nd.ni_dvp)
 				vrele(vp);
 			else
 				vput(vp);
 			if ((error = vn_start_write(NULL, &mp,
 			    V_XSLEEP | PCATCH)) != 0)
 				return (error);
 			goto restart;
 		}
 #ifdef MAC
 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 		    &nd.ni_cnd);
 		if (error != 0)
 			goto out;
 #endif
 		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
 #endif
 		vn_finished_write(mp);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 sys_lseek(struct thread *td, struct lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 
 int
 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp);
 	if (error != 0)
 		return (error);
 	error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 	    fo_seek(fp, offset, whence, td) : ESPIPE;
 	fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(struct thread *td, struct olseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Version with the 'pad' argument */
 int
 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 /*
  * Check access permissions using passed credentials.
  */
 static int
 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
      struct thread *td)
 {
 	accmode_t accmode;
 	int error;
 
 	/* Flags == 0 means only check for existence. */
 	if (user_flags == 0)
 		return (0);
 
 	accmode = 0;
 	if (user_flags & R_OK)
 		accmode |= VREAD;
 	if (user_flags & W_OK)
 		accmode |= VWRITE;
 	if (user_flags & X_OK)
 		accmode |= VEXEC;
 #ifdef MAC
 	error = mac_vnode_check_access(cred, vp, accmode);
 	if (error != 0)
 		return (error);
 #endif
 	if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 		error = VOP_ACCESS(vp, accmode, cred, td);
 	return (error);
 }
 
 /*
  * Check access permissions using "real" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_access(struct thread *td, struct access_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    0, uap->amode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct faccessat_args {
 	int	dirfd;
 	char	*path;
 	int	amode;
 	int	flag;
 }
 #endif
 int
 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 {
 
 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->amode));
 }
 
 int
 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     int flag, int amode)
 {
 	struct ucred *cred, *usecred;
 	struct vnode *vp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 	if (flag & ~AT_EACCESS)
 		return (EINVAL);
 	if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 		return (EINVAL);
 
 	/*
 	 * Create and modify a temporary credential instead of one that
 	 * is potentially shared (if we need one).
 	 */
 	cred = td->td_ucred;
 	if ((flag & AT_EACCESS) == 0 &&
 	    ((cred->cr_uid != cred->cr_ruid ||
 	    cred->cr_rgid != cred->cr_groups[0]))) {
 		usecred = crdup(cred);
 		usecred->cr_uid = cred->cr_ruid;
 		usecred->cr_groups[0] = cred->cr_rgid;
 		td->td_ucred = usecred;
 	} else
 		usecred = cred;
 	AUDIT_ARG_VALUE(amode);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 	    AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT),
 	    td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, amode, usecred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 out:
 	if (usecred != cred) {
 		td->td_ucred = cred;
 		crfree(usecred);
 	}
 	return (error);
 }
 
 /*
  * Check access permissions using "effective" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct eaccess_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    AT_EACCESS, uap->amode));
 }
 
 #if defined(COMPAT_43)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 ostat(struct thread *td, struct ostat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 olstat(struct thread *td, struct olstat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Convert from an old to a new stat structure.
  */
 void
 cvtstat(struct stat *st, struct ostat *ost)
 {
 
 	bzero(ost, sizeof(*ost));
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (st->st_size < (quad_t)1 << 32)
 		ost->st_size = st->st_size;
 	else
 		ost->st_size = -2;
 	ost->st_atim = st->st_atim;
 	ost->st_mtim = st->st_mtim;
 	ost->st_ctim = st->st_ctim;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 */
 
-/*
- * Get file status; this version follows links.
- */
-#ifndef _SYS_SYSPROTO_H_
-struct stat_args {
-	char	*path;
-	struct stat *ub;
-};
-#endif
+#if defined(COMPAT_FREEBSD11)
+void
+freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
+{
+
+	ost->st_dev = st->st_dev;
+	ost->st_ino = st->st_ino;		/* truncate */
+	ost->st_mode = st->st_mode;
+	ost->st_nlink = st->st_nlink;		/* truncate */
+	ost->st_uid = st->st_uid;
+	ost->st_gid = st->st_gid;
+	ost->st_rdev = st->st_rdev;
+	ost->st_atim = st->st_atim;
+	ost->st_mtim = st->st_mtim;
+	ost->st_ctim = st->st_ctim;
+	ost->st_size = st->st_size;
+	ost->st_blocks = st->st_blocks;
+	ost->st_blksize = st->st_blksize;
+	ost->st_flags = st->st_flags;
+	ost->st_gen = st->st_gen;
+	ost->st_lspare = 0;
+	ost->st_birthtim = st->st_birthtim;
+	bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
+	    sizeof(*ost) - offsetof(struct freebsd11_stat,
+	    st_birthtim) - sizeof(ost->st_birthtim));
+}
+
 int
-sys_stat(struct thread *td, struct stat_args *uap)
+freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 {
 	struct stat sb;
+	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
-	if (error == 0)
-		error = copyout(&sb, uap->ub, sizeof (sb));
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat(&sb, &osb);
+	error = copyout(&osb, uap->ub, sizeof(osb));
 	return (error);
 }
 
+int
+freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
+{
+	struct stat sb;
+	struct freebsd11_stat osb;
+	int error;
+
+	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
+	    UIO_USERSPACE, &sb, NULL);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat(&sb, &osb);
+	error = copyout(&osb, uap->ub, sizeof(osb));
+	return (error);
+}
+
+int
+freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
+{
+	struct fhandle fh;
+	struct stat sb;
+	struct freebsd11_stat osb;
+	int error;
+
+	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
+	if (error != 0)
+		return (error);
+	error = kern_fhstat(td, fh, &sb);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat(&sb, &osb);
+	error = copyout(&osb, uap->sb, sizeof(osb));
+	return (error);
+}
+
+int
+freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
+{
+	struct stat sb;
+	struct freebsd11_stat osb;
+	int error;
+
+	error = kern_statat(td, uap->flag, uap->fd, uap->path,
+	    UIO_USERSPACE, &sb, NULL);
+	if (error != 0)
+		return (error);
+	freebsd11_cvtstat(&sb, &osb);
+	error = copyout(&osb, uap->buf, sizeof(osb));
+	return (error);
+}
+#endif	/* COMPAT_FREEBSD11 */
+
+/*
+ * Get file status
+ */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatat_args {
 	int	fd;
 	char	*path;
 	struct stat	*buf;
 	int	flag;
 }
 #endif
 int
 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 {
 	struct stat sb;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error == 0)
 		error = copyout(&sb, uap->buf, sizeof (sb));
 	return (error);
 }
 
 int
 kern_statat(struct thread *td, int flag, int fd, char *path,
     enum uio_seg pathseg, struct stat *sbp,
     void (*hook)(struct vnode *vp, struct stat *sbp))
 {
 	struct nameidata nd;
 	struct stat sb;
 	cap_rights_t rights;
 	int error;
 
 	if (flag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FSTAT), td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	if (error == 0) {
 		SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode);
 		if (S_ISREG(sb.st_mode))
 			SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 		if (__predict_false(hook != NULL))
 			hook(nd.ni_vp, &sb);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	if (error != 0)
 		return (error);
+#ifdef __STAT_TIME_T_EXT
+	sb.st_atim_ext = 0;
+	sb.st_mtim_ext = 0;
+	sb.st_ctim_ext = 0;
+	sb.st_btim_ext = 0;
+#endif
 	*sbp = sb;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrstat(&sb);
 #endif
 	return (0);
 }
 
+#if defined(COMPAT_FREEBSD11)
 /*
- * Get file status; this version does not follow links.
- */
-#ifndef _SYS_SYSPROTO_H_
-struct lstat_args {
-	char	*path;
-	struct stat *ub;
-};
-#endif
-int
-sys_lstat(struct thread *td, struct lstat_args *uap)
-{
-	struct stat sb;
-	int error;
-
-	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
-	    UIO_USERSPACE, &sb, NULL);
-	if (error == 0)
-		error = copyout(&sb, uap->ub, sizeof (sb));
-	return (error);
-}
-
-/*
  * Implementation of the NetBSD [l]stat() functions.
  */
 void
-cvtnstat( struct stat *sb, struct nstat *nsb)
+freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 {
 
-	bzero(nsb, sizeof *nsb);
+	bzero(nsb, sizeof(*nsb));
 	nsb->st_dev = sb->st_dev;
 	nsb->st_ino = sb->st_ino;
 	nsb->st_mode = sb->st_mode;
 	nsb->st_nlink = sb->st_nlink;
 	nsb->st_uid = sb->st_uid;
 	nsb->st_gid = sb->st_gid;
 	nsb->st_rdev = sb->st_rdev;
 	nsb->st_atim = sb->st_atim;
 	nsb->st_mtim = sb->st_mtim;
 	nsb->st_ctim = sb->st_ctim;
 	nsb->st_size = sb->st_size;
 	nsb->st_blocks = sb->st_blocks;
 	nsb->st_blksize = sb->st_blksize;
 	nsb->st_flags = sb->st_flags;
 	nsb->st_gen = sb->st_gen;
 	nsb->st_birthtim = sb->st_birthtim;
 }
 
 #ifndef _SYS_SYSPROTO_H_
-struct nstat_args {
+struct freebsd11_nstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 int
-sys_nstat(struct thread *td, struct nstat_args *uap)
+freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
-	cvtnstat(&sb, &nsb);
+	freebsd11_cvtnstat(&sb, &nsb);
 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
 }
 
 /*
  * NetBSD lstat.  Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
-struct lstat_args {
+struct freebsd11_nlstat_args {
 	char	*path;
-	struct stat *ub;
+	struct nstat *ub;
 };
 #endif
 int
-sys_nlstat(struct thread *td, struct nlstat_args *uap)
+freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
-	cvtnstat(&sb, &nsb);
+	freebsd11_cvtnstat(&sb, &nsb);
 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
 }
+#endif /* COMPAT_FREEBSD11 */
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 {
 
 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct lpathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 {
 
 	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 	    NOFOLLOW));
 }
 
 int
 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name,
     u_long flags)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 	    pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	size_t	count;
 };
 #endif
 int
 sys_readlink(struct thread *td, struct readlink_args *uap)
 {
 
 	return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->count));
 }
 #ifndef _SYS_SYSPROTO_H_
 struct readlinkat_args {
 	int	fd;
 	char	*path;
 	char	*buf;
 	size_t	bufsize;
 };
 #endif
 int
 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 {
 
 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->bufsize));
 }
 
 int
 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     char *buf, enum uio_seg bufseg, size_t count)
 {
 	struct vnode *vp;
 	struct iovec aiov;
 	struct uio auio;
 	struct nameidata nd;
 	int error;
 
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 
 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, fd, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 #ifdef MAC
 	error = mac_vnode_check_readlink(td->td_ucred, vp);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 #endif
 	if (vp->v_type != VLNK)
 		error = EINVAL;
 	else {
 		aiov.iov_base = buf;
 		aiov.iov_len = count;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = bufseg;
 		auio.uio_td = td;
 		auio.uio_resid = count;
 		error = VOP_READLINK(vp, &auio, td->td_ucred);
 		td->td_retval[0] = count - auio.uio_resid;
 	}
 	vput(vp);
 	return (error);
 }
 
 /*
  * Common implementation code for chflags() and fchflags().
  */
 static int
 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	/* We can't support the value matching VNOVAL. */
 	if (flags == VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Prevent non-root users from setting flags on devices.  When
 	 * a device is reused, users can retain ownership of the device
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 		if (error != 0)
 			return (error);
 	}
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	const char *path;
 	u_long	flags;
 };
 #endif
 int
 sys_chflags(struct thread *td, struct chflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct chflagsat_args {
 	int	fd;
 	const char *path;
 	u_long	flags;
 	int	atflag;
 }
 #endif
 int
 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 {
 	int fd = uap->fd;
 	const char *path = uap->path;
 	u_long flags = uap->flags;
 	int atflag = uap->atflag;
 
 	if (atflag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag));
 }
 
 /*
  * Same as chflags() but doesn't follow symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchflags_args {
 	const char *path;
 	u_long flags;
 };
 #endif
 int
 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, AT_SYMLINK_NOFOLLOW));
 }
 
 static int
 kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag)
 {
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error, follow;
 
 	AUDIT_ARG_FFLAGS(flags);
 	follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FCHFLAGS), td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, flags);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	u_long	flags;
 };
 #endif
 int
 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_FFLAGS(uap->flags);
 	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS),
 	    &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setfflags(td, fp->f_vnode, uap->flags);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for chmod(), lchmod() and fchmod().
  */
 int
 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 #ifdef MAC
 	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_chmod(struct thread *td, struct chmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchmodat_args {
 	int	dirfd;
 	char	*path;
 	mode_t	mode;
 	int	flag;
 }
 #endif
 int
 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 {
 	int flag = uap->flag;
 	int fd = uap->fd;
 	char *path = uap->path;
 	mode_t mode = uap->mode;
 
 	if (flag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag));
 }
 
 /*
  * Change mode of a file given path name (don't follow links.)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, AT_SYMLINK_NOFOLLOW));
 }
 
 int
 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     mode_t mode, int flag)
 {
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error, follow;
 
 	AUDIT_ARG_MODE(mode);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FCHMOD), td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 int
 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_MODE(uap->mode);
 
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation for chown(), lchown(), and fchown()
  */
 int
 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
     gid_t gid)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = uid;
 	vattr.va_gid = gid;
 #ifdef MAC
 	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 	    vattr.va_gid);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_chown(struct thread *td, struct chown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchownat_args {
 	int fd;
 	const char * path;
 	uid_t uid;
 	gid_t gid;
 	int flag;
 };
 #endif
 int
 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 {
 	int flag;
 
 	flag = uap->flag;
 	if (flag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, uap->flag));
 }
 
 int
 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     int uid, int gid, int flag)
 {
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error, follow;
 
 	AUDIT_ARG_OWNER(uid, gid);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FCHOWN), td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_lchown(struct thread *td, struct lchown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_fchown(struct thread *td, struct fchown_args *uap)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
     struct timespec *tsp)
 {
 	struct timeval tv[2];
 	const struct timeval *tvp;
 	int error;
 
 	if (usrtvp == NULL) {
 		vfs_timestamp(&tsp[0]);
 		tsp[1] = tsp[0];
 	} else {
 		if (tvpseg == UIO_SYSSPACE) {
 			tvp = usrtvp;
 		} else {
 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 				return (error);
 			tvp = tv;
 		}
 
 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 			return (EINVAL);
 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 	}
 	return (0);
 }
 
 /*
  * Common implementation code for futimens(), utimensat().
  */
 #define	UTIMENS_NULL	0x1
 #define	UTIMENS_EXIT	0x2
 static int
 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
     struct timespec *tsp, int *retflags)
 {
 	struct timespec tsnow;
 	int error;
 
 	vfs_timestamp(&tsnow);
 	*retflags = 0;
 	if (usrtsp == NULL) {
 		tsp[0] = tsnow;
 		tsp[1] = tsnow;
 		*retflags |= UTIMENS_NULL;
 		return (0);
 	}
 	if (tspseg == UIO_SYSSPACE) {
 		tsp[0] = usrtsp[0];
 		tsp[1] = usrtsp[1];
 	} else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 		return (error);
 	if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 		*retflags |= UTIMENS_EXIT;
 	if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 		*retflags |= UTIMENS_NULL;
 	if (tsp[0].tv_nsec == UTIME_OMIT)
 		tsp[0].tv_sec = VNOVAL;
 	else if (tsp[0].tv_nsec == UTIME_NOW)
 		tsp[0] = tsnow;
 	else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 		return (EINVAL);
 	if (tsp[1].tv_nsec == UTIME_OMIT)
 		tsp[1].tv_sec = VNOVAL;
 	else if (tsp[1].tv_nsec == UTIME_NOW)
 		tsp[1] = tsnow;
 	else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
  * and utimensat().
  */
 static int
 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
     int numtimes, int nullflag)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error, setbirthtime;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	setbirthtime = 0;
 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 		setbirthtime = 1;
 	VATTR_NULL(&vattr);
 	vattr.va_atime = ts[0];
 	vattr.va_mtime = ts[1];
 	if (setbirthtime)
 		vattr.va_birthtime = ts[1];
 	if (numtimes > 2)
 		vattr.va_birthtime = ts[2];
 	if (nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 #ifdef MAC
 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 	    vattr.va_mtime);
 #endif
 	if (error == 0)
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_utimes(struct thread *td, struct utimes_args *uap)
 {
 
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->tptr, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct futimesat_args {
 	int fd;
 	const char * path;
 	const struct timeval * times;
 };
 #endif
 int
 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 {
 
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE));
 }
 
 int
 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	cap_rights_t rights;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FUTIMES), td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lutimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 {
 
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct nameidata nd;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct futimes_args {
 	int	fd;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_futimes(struct thread *td, struct futimes_args *uap)
 {
 
 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 }
 
 int
 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimes(tptr, tptrseg, ts);
 	if (error != 0)
 		return (error);
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_futimens(struct thread *td, struct futimens_args *uap)
 {
 
 	return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 }
 
 int
 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	cap_rights_t rights;
 	int error, flags;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimens(tptr, tptrseg, ts, &flags);
 	if (error != 0)
 		return (error);
 	if (flags & UTIMENS_EXIT)
 		return (0);
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 {
 
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE, uap->flag));
 }
 
 int
 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
     struct timespec *tptr, enum uio_seg tptrseg, int flag)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	cap_rights_t rights;
 	int error, flags;
 
 	if (flag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 	    FOLLOW) | AUDITVNODE1, pathseg, path, fd,
 	    cap_rights_init(&rights, CAP_FUTIMES), td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	/*
 	 * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 	 * POSIX states:
 	 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 	 * "Search permission is denied by a component of the path prefix."
 	 */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if ((flags & UTIMENS_EXIT) == 0)
 		error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 int
 sys_truncate(struct thread *td, struct truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	void *rl_cookie;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	if (length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vn_rangelock_unlock(vp, rl_cookie);
 		vrele(vp);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	}
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	vn_rangelock_unlock(vp, rl_cookie);
 	vrele(vp);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 int
 otruncate(struct thread *td, struct otruncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Versions with the pad argument */
 int
 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 #endif
 
 int
 kern_fsync(struct thread *td, int fd, bool fullsync)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	cap_rights_t rights;
 	int error, lock_flags;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 #if 0
 	if (!fullsync)
 		/* XXXKIB: compete outstanding aio writes */;
 #endif
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error != 0)
 		goto drop;
 	if (MNT_SHARED_WRITES(mp) ||
 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 		lock_flags = LK_SHARED;
 	} else {
 		lock_flags = LK_EXCLUSIVE;
 	}
 	vn_lock(vp, lock_flags | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if (vp->v_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	}
 	error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 drop:
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 int
 sys_fsync(struct thread *td, struct fsync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, true));
 }
 
 int
 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, false));
 }
 
 /*
  * Rename files.  Source and destination must either both be directories, or
  * both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 int
 sys_rename(struct thread *td, struct rename_args *uap)
 {
 
 	return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 	    uap->to, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct renameat_args {
 	int	oldfd;
 	char	*old;
 	int	newfd;
 	char	*new;
 };
 #endif
 int
 sys_renameat(struct thread *td, struct renameat_args *uap)
 {
 
 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 	    UIO_USERSPACE));
 }
 
 int
 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
     enum uio_seg pathseg)
 {
 	struct mount *mp = NULL;
 	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	cap_rights_t rights;
 	int error;
 
 again:
 	bwillwrite();
 #ifdef MAC
 	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 	    AUDITVNODE1, pathseg, old, oldfd,
 	    cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 #else
 	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 	    pathseg, old, oldfd,
 	    cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td);
 #endif
 
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 #ifdef MAC
 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 	    fromnd.ni_vp, &fromnd.ni_cnd);
 	VOP_UNLOCK(fromnd.ni_dvp, 0);
 	if (fromnd.ni_dvp != fromnd.ni_vp)
 		VOP_UNLOCK(fromnd.ni_vp, 0);
 #endif
 	fvp = fromnd.ni_vp;
 	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 	    SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 	    cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&tond)) != 0) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	error = vn_start_write(fvp, &mp, V_NOWAIT);
 	if (error != 0) {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		vrele(tond.ni_startdir);
 		if (fromnd.ni_startdir != NULL)
 			vrele(fromnd.ni_startdir);
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error != 0)
 			return (error);
 		goto again;
 	}
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 #ifdef CAPABILITIES
 		if (newfd != AT_FDCWD) {
 			/*
 			 * If the target already exists we require CAP_UNLINKAT
 			 * from 'newfd'.
 			 */
 			error = cap_check(&tond.ni_filecaps.fc_rights,
 			    cap_rights_init(&rights, CAP_UNLINKAT));
 			if (error != 0)
 				goto out;
 		}
 #endif
 	}
 	if (fvp == tdvp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * If the source is the same as the destination (that is, if they
 	 * are links to the same vnode), then there is nothing to do.
 	 */
 	if (fvp == tvp)
 		error = -1;
 #ifdef MAC
 	else
 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 #endif
 out:
 	if (error == 0) {
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 		    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 	} else {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	vn_finished_write(mp);
 out1:
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 {
 
 	return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkdirat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 {
 
 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
     int mode)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT),
 	    td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		/*
 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
 		 * the strange behaviour of leaving the vnode unlocked
 		 * if the target is the same vnode as the parent.
 		 */
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (error == 0)
 		vput(nd.ni_vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 int
 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 {
 
 	return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT) {
 		error = EBUSY;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 	    &nd.ni_cnd);
 	if (error != 0)
 		goto out;
 #endif
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(vp);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
 	return (error);
 }
 
+#if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
+int
+freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
+    long *basep, void (*func)(struct freebsd11_dirent *))
+{
+	struct freebsd11_dirent dstdp;
+	struct dirent *dp, *edp;
+	char *dirbuf;
+	off_t base;
+	ssize_t resid, ucount;
+	int error;
+
+	/* XXX arbitrary sanity limit on `count'. */
+	count = min(count, 64 * 1024);
+
+	dirbuf = malloc(count, M_TEMP, M_WAITOK);
+
+	error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
+	    UIO_SYSSPACE);
+	if (error != 0)
+		goto done;
+	if (basep != NULL)
+		*basep = base;
+
+	ucount = 0;
+	for (dp = (struct dirent *)dirbuf,
+	    edp = (struct dirent *)&dirbuf[count - resid];
+	    ucount < count && dp < edp; ) {
+		if (dp->d_reclen == 0)
+			break;
+		MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
+		if (dp->d_namlen >= sizeof(dstdp.d_name))
+			continue;
+		dstdp.d_type = dp->d_type;
+		dstdp.d_namlen = dp->d_namlen;
+		dstdp.d_fileno = dp->d_fileno;		/* truncate */
+		dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
+		    ((dp->d_namlen + 1 + 3) &~ 3);
+		bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
+		bzero(dstdp.d_name + dstdp.d_namlen,
+		    dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
+		    dstdp.d_namlen);
+		MPASS(dstdp.d_reclen <= dp->d_reclen);
+		MPASS(ucount + dstdp.d_reclen <= count);
+		if (func != NULL)
+			func(&dstdp);
+		error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
+		if (error != 0)
+			break;
+		dp = (struct dirent *)((char *)dp + dp->d_reclen);
+		ucount += dstdp.d_reclen;
+	}
+
+done:
+	free(dirbuf, M_TEMP);
+	if (error == 0)
+		td->td_retval[0] = ucount;
+	return (error);
+}
+#endif /* COMPAT */
+
 #ifdef COMPAT_43
+static void
+ogetdirentries_cvt(struct freebsd11_dirent *dp)
+{
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+	/*
+	 * The expected low byte of dp->d_namlen is our dp->d_type.
+	 * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
+	 */
+	dp->d_type = dp->d_namlen;
+	dp->d_namlen = 0;
+#else
+	/*
+	 * The dp->d_type is the high byte of the expected dp->d_namlen,
+	 * so must be zero'ed.
+	 */
+	dp->d_type = 0;
+#endif
+}
+
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 {
 	long loff;
 	int error;
 
 	error = kern_ogetdirentries(td, uap, &loff);
 	if (error == 0)
 		error = copyout(&loff, uap->basep, sizeof(long));
 	return (error);
 }
 
 int
 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
     long *ploff)
 {
-	struct vnode *vp;
-	struct file *fp;
-	struct uio auio, kuio;
-	struct iovec aiov, kiov;
-	struct dirent *dp, *edp;
-	cap_rights_t rights;
-	caddr_t dirbuf;
-	int error, eofflag, readcnt;
-	long loff;
-	off_t foffset;
+	long base;
+	int error;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	if (uap->count > 64 * 1024)
 		return (EINVAL);
-	error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp);
-	if (error != 0)
-		return (error);
-	if ((fp->f_flag & FREAD) == 0) {
-		fdrop(fp, td);
-		return (EBADF);
-	}
-	vp = fp->f_vnode;
-	foffset = foffset_lock(fp, 0);
-unionread:
-	if (vp->v_type != VDIR) {
-		foffset_unlock(fp, foffset, 0);
-		fdrop(fp, td);
-		return (EINVAL);
-	}
-	aiov.iov_base = uap->buf;
-	aiov.iov_len = uap->count;
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	auio.uio_rw = UIO_READ;
-	auio.uio_segflg = UIO_USERSPACE;
-	auio.uio_td = td;
-	auio.uio_resid = uap->count;
-	vn_lock(vp, LK_SHARED | LK_RETRY);
-	loff = auio.uio_offset = foffset;
-#ifdef MAC
-	error = mac_vnode_check_readdir(td->td_ucred, vp);
-	if (error != 0) {
-		VOP_UNLOCK(vp, 0);
-		foffset_unlock(fp, foffset, FOF_NOUPDATE);
-		fdrop(fp, td);
-		return (error);
-	}
-#endif
-#	if (BYTE_ORDER != LITTLE_ENDIAN)
-		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
-			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
-			    NULL, NULL);
-			foffset = auio.uio_offset;
-		} else
-#	endif
-	{
-		kuio = auio;
-		kuio.uio_iov = &kiov;
-		kuio.uio_segflg = UIO_SYSSPACE;
-		kiov.iov_len = uap->count;
-		dirbuf = malloc(uap->count, M_TEMP, M_WAITOK);
-		kiov.iov_base = dirbuf;
-		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
-			    NULL, NULL);
-		foffset = kuio.uio_offset;
-		if (error == 0) {
-			readcnt = uap->count - kuio.uio_resid;
-			edp = (struct dirent *)&dirbuf[readcnt];
-			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
-#				if (BYTE_ORDER == LITTLE_ENDIAN)
-					/*
-					 * The expected low byte of
-					 * dp->d_namlen is our dp->d_type.
-					 * The high MBZ byte of dp->d_namlen
-					 * is our dp->d_namlen.
-					 */
-					dp->d_type = dp->d_namlen;
-					dp->d_namlen = 0;
-#				else
-					/*
-					 * The dp->d_type is the high byte
-					 * of the expected dp->d_namlen,
-					 * so must be zero'ed.
-					 */
-					dp->d_type = 0;
-#				endif
-				if (dp->d_reclen > 0) {
-					dp = (struct dirent *)
-					    ((char *)dp + dp->d_reclen);
-				} else {
-					error = EIO;
-					break;
-				}
-			}
-			if (dp >= edp)
-				error = uiomove(dirbuf, readcnt, &auio);
-		}
-		free(dirbuf, M_TEMP);
-	}
-	if (error != 0) {
-		VOP_UNLOCK(vp, 0);
-		foffset_unlock(fp, foffset, 0);
-		fdrop(fp, td);
-		return (error);
-	}
-	if (uap->count == auio.uio_resid &&
-	    (vp->v_vflag & VV_ROOT) &&
-	    (vp->v_mount->mnt_flag & MNT_UNION)) {
-		struct vnode *tvp = vp;
-		vp = vp->v_mount->mnt_vnodecovered;
-		VREF(vp);
-		fp->f_vnode = vp;
-		fp->f_data = vp;
-		foffset = 0;
-		vput(tvp);
-		goto unionread;
-	}
-	VOP_UNLOCK(vp, 0);
-	foffset_unlock(fp, foffset, 0);
-	fdrop(fp, td);
-	td->td_retval[0] = uap->count - auio.uio_resid;
-	if (error == 0)
-		*ploff = loff;
+
+	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
+	    &base, ogetdirentries_cvt);
+
+	if (error == 0 && uap->basep != NULL)
+		error = copyout(&base, uap->basep, sizeof(long));
+
 	return (error);
 }
 #endif /* COMPAT_43 */
 
-/*
- * Read a block of directory entries in a filesystem independent format.
- */
+#if defined(COMPAT_FREEBSD11)
 #ifndef _SYS_SYSPROTO_H_
-struct getdirentries_args {
+struct freebsd11_getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
-sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
+freebsd11_getdirentries(struct thread *td,
+    struct freebsd11_getdirentries_args *uap)
 {
 	long base;
 	int error;
 
+	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
+	    &base, NULL);
+
+	if (error == 0 && uap->basep != NULL)
+		error = copyout(&base, uap->basep, sizeof(long));
+	return (error);
+}
+
+int
+freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
+{
+	struct freebsd11_getdirentries_args ap;
+
+	ap.fd = uap->fd;
+	ap.buf = uap->buf;
+	ap.count = uap->count;
+	ap.basep = NULL;
+	return (freebsd11_getdirentries(td, &ap));
+}
+#endif /* COMPAT_FREEBSD11 */
+
+/*
+ * Read a block of directory entries in a filesystem independent format.
+ */
+int
+sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
+{
+	off_t base;
+	int error;
+
 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 	    NULL, UIO_USERSPACE);
 	if (error != 0)
 		return (error);
 	if (uap->basep != NULL)
-		error = copyout(&base, uap->basep, sizeof(long));
+		error = copyout(&base, uap->basep, sizeof(off_t));
 	return (error);
 }
 
 int
-kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
-    long *basep, ssize_t *residp, enum uio_seg bufseg)
+kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
+    off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	cap_rights_t rights;
-	long loff;
+	off_t loff;
 	int error, eofflag;
 	off_t foffset;
 
 	AUDIT_ARG_FD(fd);
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 	auio.uio_resid = count;
 	error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp);
 	if (error != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 	foffset = foffset_lock(fp, 0);
 unionread:
 	if (vp->v_type != VDIR) {
 		error = EINVAL;
 		goto fail;
 	}
 	aiov.iov_base = buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = bufseg;
 	auio.uio_td = td;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	loff = auio.uio_offset = foffset;
 #ifdef MAC
 	error = mac_vnode_check_readdir(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 		    NULL);
 	foffset = auio.uio_offset;
 	if (error != 0) {
 		VOP_UNLOCK(vp, 0);
 		goto fail;
 	}
 	if (count == auio.uio_resid &&
 	    (vp->v_vflag & VV_ROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_vnode = vp;
 		fp->f_data = vp;
 		foffset = 0;
 		vput(tvp);
 		goto unionread;
 	}
 	VOP_UNLOCK(vp, 0);
 	*basep = loff;
 	if (residp != NULL)
 		*residp = auio.uio_resid;
 	td->td_retval[0] = count - auio.uio_resid;
 fail:
 	foffset_unlock(fp, foffset, 0);
 	fdrop(fp, td);
 	return (error);
-}
-
-#ifndef _SYS_SYSPROTO_H_
-struct getdents_args {
-	int fd;
-	char *buf;
-	size_t count;
-};
-#endif
-int
-sys_getdents(struct thread *td, struct getdents_args *uap)
-{
-	struct getdirentries_args ap;
-
-	ap.fd = uap->fd;
-	ap.buf = uap->buf;
-	ap.count = uap->count;
-	ap.basep = NULL;
-	return (sys_getdirentries(td, &ap));
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 sys_umask(struct thread *td, struct umask_args *uap)
 {
 	struct filedesc *fdp;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	td->td_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = uap->newmask & ALLPERMS;
 	FILEDESC_XUNLOCK(fdp);
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem away from
  * vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 int
 sys_revoke(struct thread *td, struct revoke_args *uap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_revoke(td->td_ucred, vp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 	if (error != 0)
 		goto out;
 	if (td->td_ucred->cr_uid != vattr.va_uid) {
 		error = priv_check(td, PRIV_VFS_ADMIN);
 		if (error != 0)
 			goto out;
 	}
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
 out:
 	vput(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry and check that, if it
  * is a capability, the correct rights are present. A reference on the file
  * entry is held upon returning.
  */
 int
 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * The file could be not of the vnode type, or it may be not
 	 * yet fully initialized, in which case the f_vnode pointer
 	 * may be set, but f_ops is still badfileops.  E.g.,
 	 * devfs_open() transiently create such situation to
 	 * facilitate csw d_fdopen().
 	 *
 	 * Dupfdopen() handling in kern_openat() installs the
 	 * half-baked file into the process descriptor table, allowing
 	 * other thread to dereference it. Guard against the race by
 	 * checking f_ops.
 	 */
 	if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	*fpp = fp;
 	return (0);
 }
 
 
 /*
  * Get an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lgetfh_args {
 	char	*fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->fname, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error == 0)
 		error = copyout(&fh, uap->fhp, sizeof (fh));
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getfh_args {
 	char	*fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_getfh(struct thread *td, struct getfh_args *uap)
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->fname, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error == 0)
 		error = copyout(&fh, uap->fhp, sizeof (fh));
 	return (error);
 }
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into an
  * open descriptor.
  *
  * warning: do not remove the priv_check() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhopen_args {
 	const struct fhandle *u_fhp;
 	int flags;
 };
 #endif
 int
 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct fhandle fhp;
 	struct file *fp;
 	int fmode, error;
 	int indx;
 
 	error = priv_check(td, PRIV_VFS_FHOPEN);
 	if (error != 0)
 		return (error);
 	indx = -1;
 	fmode = FFLAGS(uap->flags);
 	/* why not allow a non-read/write open for our lockd? */
 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 		return (EINVAL);
 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 	if (error != 0)
 		return(error);
 	/* find the mount point */
 	mp = vfs_busyfs(&fhp.fh_fsid);
 	if (mp == NULL)
 		return (ESTALE);
 	/* now give me my vnode, it gets returned to me locked */
 	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 
 	error = falloc_noinstall(td, &fp);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	/*
 	 * An extra reference on `fp' has been held for us by
 	 * falloc_noinstall().
 	 */
 
 #ifdef INVARIANTS
 	td->td_dupfd = -1;
 #endif
 	error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 	if (error != 0) {
 		KASSERT(fp->f_ops == &badfileops,
 		    ("VOP_OPEN in fhopen() set f_ops"));
 		KASSERT(td->td_dupfd < 0,
 		    ("fhopen() encountered fdopen()"));
 
 		vput(vp);
 		goto bad;
 	}
 #ifdef INVARIANTS
 	td->td_dupfd = 0;
 #endif
 	fp->f_vnode = vp;
 	fp->f_seqcount = 1;
 	finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 	    &vnops);
 	VOP_UNLOCK(vp, 0);
 	if ((fmode & O_TRUNC) != 0) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 
 	error = finstall(td, fp, &indx, fmode, NULL);
 bad:
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (error);
 }
 
 /*
  * Stat an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstat_args {
 	struct fhandle *u_fhp;
 	struct stat *sb;
 };
 #endif
 int
 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 {
 	struct stat sb;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error == 0)
 		error = copyout(&sb, uap->sb, sizeof(sb));
 	return (error);
 }
 
 int
 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTAT);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 	error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 {
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(*sfp));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 {
 	struct statfs *sp;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTATFS);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	if (error != 0) {
 		vfs_unbusy(mp);
 		return (error);
 	}
 	vput(vp);
 	error = prison_canseemount(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	/*
 	 * Set these in case the underlying filesystem fails to do so.
 	 */
 	sp = &mp->mnt_stat;
 	sp->f_version = STATFS_VERSION;
 	sp->f_namemax = NAME_MAX;
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp);
 	if (error == 0)
 		*buf = *sp;
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 int
 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 {
 	struct file *fp;
 	struct mount *mp;
 	struct vnode *vp;
 	cap_rights_t rights;
 	off_t olen, ooffset;
 	int error;
 #ifdef AUDIT
 	int audited_vnode1 = 0;
 #endif
 
 	AUDIT_ARG_FD(fd);
 	if (offset < 0 || len <= 0)
 		return (EINVAL);
 	/* Check for wrap. */
 	if (offset > OFF_MAX - len)
 		return (EFBIG);
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp);
 	if (error != 0)
 		return (error);
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 		error = ESPIPE;
 		goto out;
 	}
 	if ((fp->f_flag & FWRITE) == 0) {
 		error = EBADF;
 		goto out;
 	}
 	if (fp->f_type != DTYPE_VNODE) {
 		error = ENODEV;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp->v_type != VREG) {
 		error = ENODEV;
 		goto out;
 	}
 
 	/* Allocating blocks may take a long time, so iterate. */
 	for (;;) {
 		olen = len;
 		ooffset = offset;
 
 		bwillwrite();
 		mp = NULL;
 		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 		if (error != 0)
 			break;
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error != 0) {
 			vn_finished_write(mp);
 			break;
 		}
 #ifdef AUDIT
 		if (!audited_vnode1) {
 			AUDIT_ARG_VNODE1(vp);
 			audited_vnode1 = 1;
 		}
 #endif
 #ifdef MAC
 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 		if (error == 0)
 #endif
 			error = VOP_ALLOCATE(vp, &offset, &len);
 		VOP_UNLOCK(vp, 0);
 		vn_finished_write(mp);
 
 		if (olen + ooffset != offset + len) {
 			panic("offset + len changed from %jx/%jx to %jx/%jx",
 			    ooffset, olen, offset, len);
 		}
 		if (error != 0 || len == 0)
 			break;
 		KASSERT(olen > len, ("Iteration did not make progress?"));
 		maybe_yield();
 	}
  out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 {
 	int error;
 
 	error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
 	return (kern_posix_error(td, error));
 }
 
 /*
  * Unlike madvise(2), we do not make a best effort to remember every
  * possible caching hint.  Instead, we remember the last setting with
  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
  * region of any current setting.
  */
 int
 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
     int advice)
 {
 	struct fadvise_info *fa, *new;
 	struct file *fp;
 	struct vnode *vp;
 	cap_rights_t rights;
 	off_t end;
 	int error;
 
 	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 		return (EINVAL);
 	AUDIT_ARG_VALUE(advice);
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 		break;
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		new = NULL;
 		break;
 	default:
 		return (EINVAL);
 	}
 	/* XXX: CAP_POSIX_FADVISE? */
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, cap_rights_init(&rights), &fp);
 	if (error != 0)
 		goto out;
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 		error = ESPIPE;
 		goto out;
 	}
 	if (fp->f_type != DTYPE_VNODE) {
 		error = ENODEV;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp->v_type != VREG) {
 		error = ENODEV;
 		goto out;
 	}
 	if (len == 0)
 		end = OFF_MAX;
 	else
 		end = offset + len - 1;
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		/*
 		 * Try to merge any existing non-standard region with
 		 * this new region if possible, otherwise create a new
 		 * non-standard region for this request.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL && fa->fa_advice == advice &&
 		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
 		    (end != OFF_MAX && fa->fa_start == end + 1) ||
 		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 			if (offset < fa->fa_start)
 				fa->fa_start = offset;
 			if (end > fa->fa_end)
 				fa->fa_end = end;
 		} else {
 			new->fa_advice = advice;
 			new->fa_start = offset;
 			new->fa_end = end;
 			fp->f_advice = new;
 			new = fa;
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_NORMAL:
 		/*
 		 * If a the "normal" region overlaps with an existing
 		 * non-standard region, trim or remove the
 		 * non-standard region.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL) {
 			if (offset <= fa->fa_start && end >= fa->fa_end) {
 				new = fa;
 				fp->f_advice = NULL;
 			} else if (offset <= fa->fa_start &&
 			    end >= fa->fa_start)
 				fa->fa_start = end + 1;
 			else if (offset <= fa->fa_end && end >= fa->fa_end)
 				fa->fa_end = offset - 1;
 			else if (offset >= fa->fa_start && end <= fa->fa_end) {
 				/*
 				 * If the "normal" region is a middle
 				 * portion of the existing
 				 * non-standard region, just remove
 				 * the whole thing rather than picking
 				 * one side or the other to
 				 * preserve.
 				 */
 				new = fa;
 				fp->f_advice = NULL;
 			}
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		error = VOP_ADVISE(vp, offset, end, advice);
 		break;
 	}
 out:
 	if (fp != NULL)
 		fdrop(fp, td);
 	free(new, M_FADVISE);
 	return (error);
 }
 
 int
 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 	    uap->advice);
 	return (kern_posix_error(td, error));
 }
Index: head/sys/kern/vfs_vnops.c
===================================================================
--- head/sys/kern/vfs_vnops.c	(revision 318735)
+++ head/sys/kern/vfs_vnops.c	(revision 318736)
@@ -1,2491 +1,2495 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
  * Copyright (c) 2013, 2014 The FreeBSD Foundation
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/disk.h>
 #include <sys/fail.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/kdb.h>
 #include <sys/stat.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/filio.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/ttycom.h>
 #include <sys/conf.h>
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vnode_pager.h>
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 static fo_rdwr_t	vn_read;
 static fo_rdwr_t	vn_write;
 static fo_rdwr_t	vn_io_fault;
 static fo_truncate_t	vn_truncate;
 static fo_ioctl_t	vn_ioctl;
 static fo_poll_t	vn_poll;
 static fo_kqfilter_t	vn_kqfilter;
 static fo_stat_t	vn_statfile;
 static fo_close_t	vn_closefile;
 static fo_mmap_t	vn_mmap;
 
 struct 	fileops vnops = {
 	.fo_read = vn_io_fault,
 	.fo_write = vn_io_fault,
 	.fo_truncate = vn_truncate,
 	.fo_ioctl = vn_ioctl,
 	.fo_poll = vn_poll,
 	.fo_kqfilter = vn_kqfilter,
 	.fo_stat = vn_statfile,
 	.fo_close = vn_closefile,
 	.fo_chmod = vn_chmod,
 	.fo_chown = vn_chown,
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap = vn_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 static const int io_hold_cnt = 16;
 static int vn_io_fault_enable = 1;
 SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW,
     &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance");
 static int vn_io_fault_prefault = 0;
 SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RW,
     &vn_io_fault_prefault, 0, "Enable vn_io_fault prefaulting");
 static u_long vn_io_faults_cnt;
 SYSCTL_ULONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD,
     &vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers");
 
 /*
  * Returns true if vn_io_fault mode of handling the i/o request should
  * be used.
  */
 static bool
 do_vn_io_fault(struct vnode *vp, struct uio *uio)
 {
 	struct mount *mp;
 
 	return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG &&
 	    (mp = vp->v_mount) != NULL &&
 	    (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && vn_io_fault_enable);
 }
 
 /*
  * Structure used to pass arguments to vn_io_fault1(), to do either
  * file- or vnode-based I/O calls.
  */
 struct vn_io_fault_args {
 	enum {
 		VN_IO_FAULT_FOP,
 		VN_IO_FAULT_VOP
 	} kind;
 	struct ucred *cred;
 	int flags;
 	union {
 		struct fop_args_tag {
 			struct file *fp;
 			fo_rdwr_t *doio;
 		} fop_args;
 		struct vop_args_tag {
 			struct vnode *vp;
 		} vop_args;
 	} args;
 };
 
 static int vn_io_fault1(struct vnode *vp, struct uio *uio,
     struct vn_io_fault_args *args, struct thread *td);
 
 int
 vn_open(ndp, flagp, cmode, fp)
 	struct nameidata *ndp;
 	int *flagp, cmode;
 	struct file *fp;
 {
 	struct thread *td = ndp->ni_cnd.cn_thread;
 
 	return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
 }
 
 /*
  * Common code for vnode open operations via a name lookup.
  * Lookup the vnode and invoke VOP_CREATE if needed.
  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  * 
  * Note that this does NOT free nameidata for the successful case,
  * due to the NDINIT being done elsewhere.
  */
 int
 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
     struct ucred *cred, struct file *fp)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct thread *td = ndp->ni_cnd.cn_thread;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	int fmode, error;
 
 restart:
 	fmode = *flagp;
 	if ((fmode & (O_CREAT | O_EXCL | O_DIRECTORY)) == (O_CREAT |
 	    O_EXCL | O_DIRECTORY))
 		return (EINVAL);
 	else if ((fmode & (O_CREAT | O_DIRECTORY)) == O_CREAT) {
 		ndp->ni_cnd.cn_nameiop = CREATE;
 		/*
 		 * Set NOCACHE to avoid flushing the cache when
 		 * rolling in many files at once.
 		*/
 		ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | NOCACHE;
 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
 			ndp->ni_cnd.cn_flags |= FOLLOW;
 		if (!(vn_open_flags & VN_OPEN_NOAUDIT))
 			ndp->ni_cnd.cn_flags |= AUDITVNODE1;
 		if (vn_open_flags & VN_OPEN_NOCAPCHECK)
 			ndp->ni_cnd.cn_flags |= NOCAPCHECK;
 		bwillwrite();
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		if (ndp->ni_vp == NULL) {
 			VATTR_NULL(vap);
 			vap->va_type = VREG;
 			vap->va_mode = cmode;
 			if (fmode & O_EXCL)
 				vap->va_vaflags |= VA_EXCLUSIVE;
 			if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				vput(ndp->ni_dvp);
 				if ((error = vn_start_write(NULL, &mp,
 				    V_XSLEEP | PCATCH)) != 0)
 					return (error);
 				goto restart;
 			}
 			if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0)
 				ndp->ni_cnd.cn_flags |= MAKEENTRY;
 #ifdef MAC
 			error = mac_vnode_check_create(cred, ndp->ni_dvp,
 			    &ndp->ni_cnd, vap);
 			if (error == 0)
 #endif
 				error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 						   &ndp->ni_cnd, vap);
 			vput(ndp->ni_dvp);
 			vn_finished_write(mp);
 			if (error) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				return (error);
 			}
 			fmode &= ~O_TRUNC;
 			vp = ndp->ni_vp;
 		} else {
 			if (ndp->ni_dvp == ndp->ni_vp)
 				vrele(ndp->ni_dvp);
 			else
 				vput(ndp->ni_dvp);
 			ndp->ni_dvp = NULL;
 			vp = ndp->ni_vp;
 			if (fmode & O_EXCL) {
 				error = EEXIST;
 				goto bad;
 			}
 			fmode &= ~O_CREAT;
 		}
 	} else {
 		ndp->ni_cnd.cn_nameiop = LOOKUP;
 		ndp->ni_cnd.cn_flags = ISOPEN |
 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
 		if (!(fmode & FWRITE))
 			ndp->ni_cnd.cn_flags |= LOCKSHARED;
 		if (!(vn_open_flags & VN_OPEN_NOAUDIT))
 			ndp->ni_cnd.cn_flags |= AUDITVNODE1;
 		if (vn_open_flags & VN_OPEN_NOCAPCHECK)
 			ndp->ni_cnd.cn_flags |= NOCAPCHECK;
 		if ((error = namei(ndp)) != 0)
 			return (error);
 		vp = ndp->ni_vp;
 	}
 	error = vn_open_vnode(vp, fmode, cred, td, fp);
 	if (error)
 		goto bad;
 	*flagp = fmode;
 	return (0);
 bad:
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	vput(vp);
 	*flagp = fmode;
 	ndp->ni_vp = NULL;
 	return (error);
 }
 
 /*
  * Common code for vnode open operations once a vnode is located.
  * Check permissions, and call the VOP_OPEN routine.
  */
 int
 vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
     struct thread *td, struct file *fp)
 {
 	accmode_t accmode;
 	struct flock lf;
 	int error, lock_flags, type;
 
 	if (vp->v_type == VLNK)
 		return (EMLINK);
 	if (vp->v_type == VSOCK)
 		return (EOPNOTSUPP);
 	if (vp->v_type != VDIR && fmode & O_DIRECTORY)
 		return (ENOTDIR);
 	accmode = 0;
 	if (fmode & (FWRITE | O_TRUNC)) {
 		if (vp->v_type == VDIR)
 			return (EISDIR);
 		accmode |= VWRITE;
 	}
 	if (fmode & FREAD)
 		accmode |= VREAD;
 	if (fmode & FEXEC)
 		accmode |= VEXEC;
 	if ((fmode & O_APPEND) && (fmode & FWRITE))
 		accmode |= VAPPEND;
 #ifdef MAC
 	if (fmode & O_CREAT)
 		accmode |= VCREAT;
 	if (fmode & O_VERIFY)
 		accmode |= VVERIFY;
 	error = mac_vnode_check_open(cred, vp, accmode);
 	if (error)
 		return (error);
 
 	accmode &= ~(VCREAT | VVERIFY);
 #endif
 	if ((fmode & O_CREAT) == 0) {
 		if (accmode & VWRITE) {
 			error = vn_writechk(vp);
 			if (error)
 				return (error);
 		}
 		if (accmode) {
 		        error = VOP_ACCESS(vp, accmode, cred, td);
 			if (error)
 				return (error);
 		}
 	}
 	if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vn_lock(vp, LK_UPGRADE | LK_RETRY);
 	if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
 		return (error);
 
 	while ((fmode & (O_EXLOCK | O_SHLOCK)) != 0) {
 		KASSERT(fp != NULL, ("open with flock requires fp"));
 		if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		lock_flags = VOP_ISLOCKED(vp);
 		VOP_UNLOCK(vp, 0);
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (fmode & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((fmode & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
 		if (error == 0)
 			fp->f_flag |= FHASLOCK;
 		vn_lock(vp, lock_flags | LK_RETRY);
 		if (error != 0)
 			break;
 		if ((vp->v_iflag & VI_DOOMED) != 0) {
 			error = ENOENT;
 			break;
 		}
 
 		/*
 		 * Another thread might have used this vnode as an
 		 * executable while the vnode lock was dropped.
 		 * Ensure the vnode is still able to be opened for
 		 * writing after the lock has been obtained.
 		 */
 		if ((accmode & VWRITE) != 0)
 			error = vn_writechk(vp);
 		break;
 	}
 
 	if (error != 0) {
 		fp->f_flag |= FOPENFAILED;
 		fp->f_vnode = vp;
 		if (fp->f_ops == &badfileops) {
 			fp->f_type = DTYPE_VNODE;
 			fp->f_ops = &vnops;
 		}
 		vref(vp);
 	} else if  ((fmode & FWRITE) != 0) {
 		VOP_ADD_WRITECOUNT(vp, 1);
 		CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
 		    __func__, vp, vp->v_writecount);
 	}
 	ASSERT_VOP_LOCKED(vp, "vn_open_vnode");
 	return (error);
 }
 
 /*
  * Check for write permissions on the specified vnode.
  * Prototype text segments cannot be written.
  */
 int
 vn_writechk(struct vnode *vp)
 {
 
 	ASSERT_VOP_LOCKED(vp, "vn_writechk");
 	/*
 	 * If there's shared text associated with
 	 * the vnode, try to free it up once.  If
 	 * we fail, we can't allow writing.
 	 */
 	if (VOP_IS_TEXT(vp))
 		return (ETXTBSY);
 
 	return (0);
 }
 
 /*
  * Vnode close call
  */
 static int
 vn_close1(struct vnode *vp, int flags, struct ucred *file_cred,
     struct thread *td, bool keep_ref)
 {
 	struct mount *mp;
 	int error, lock_flags;
 
 	if (vp->v_type != VFIFO && (flags & FWRITE) == 0 &&
 	    MNT_EXTENDED_SHARED(vp->v_mount))
 		lock_flags = LK_SHARED;
 	else
 		lock_flags = LK_EXCLUSIVE;
 
 	vn_start_write(vp, &mp, V_WAIT);
 	vn_lock(vp, lock_flags | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) {
 		VNASSERT(vp->v_writecount > 0, vp, 
 		    ("vn_close: negative writecount"));
 		VOP_ADD_WRITECOUNT(vp, -1);
 		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
 		    __func__, vp, vp->v_writecount);
 	}
 	error = VOP_CLOSE(vp, flags, file_cred, td);
 	if (keep_ref)
 		VOP_UNLOCK(vp, 0);
 	else
 		vput(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 vn_close(struct vnode *vp, int flags, struct ucred *file_cred,
     struct thread *td)
 {
 
 	return (vn_close1(vp, flags, file_cred, td, false));
 }
 
 /*
  * Heuristic to detect sequential operation.
  */
 static int
 sequential_heuristic(struct uio *uio, struct file *fp)
 {
 
 	ASSERT_VOP_LOCKED(fp->f_vnode, __func__);
 	if (fp->f_flag & FRDAHEAD)
 		return (fp->f_seqcount << IO_SEQSHIFT);
 
 	/*
 	 * Offset 0 is handled specially.  open() sets f_seqcount to 1 so
 	 * that the first I/O is normally considered to be slightly
 	 * sequential.  Seeking to offset 0 doesn't change sequentiality
 	 * unless previous seeks have reduced f_seqcount to 0, in which
 	 * case offset 0 is not special.
 	 */
 	if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
 	    uio->uio_offset == fp->f_nextoff) {
 		/*
 		 * f_seqcount is in units of fixed-size blocks so that it
 		 * depends mainly on the amount of sequential I/O and not
 		 * much on the number of sequential I/O's.  The fixed size
 		 * of 16384 is hard-coded here since it is (not quite) just
 		 * a magic size that works well here.  This size is more
 		 * closely related to the best I/O size for real disks than
 		 * to any block size used by software.
 		 */
 		fp->f_seqcount += howmany(uio->uio_resid, 16384);
 		if (fp->f_seqcount > IO_SEQMAX)
 			fp->f_seqcount = IO_SEQMAX;
 		return (fp->f_seqcount << IO_SEQSHIFT);
 	}
 
 	/* Not sequential.  Quickly draw-down sequentiality. */
 	if (fp->f_seqcount > 1)
 		fp->f_seqcount = 1;
 	else
 		fp->f_seqcount = 0;
 	return (0);
 }
 
 /*
  * Package up an I/O request on a vnode into a uio and do it.
  */
 int
 vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
     enum uio_seg segflg, int ioflg, struct ucred *active_cred,
     struct ucred *file_cred, ssize_t *aresid, struct thread *td)
 {
 	struct uio auio;
 	struct iovec aiov;
 	struct mount *mp;
 	struct ucred *cred;
 	void *rl_cookie;
 	struct vn_io_fault_args args;
 	int error, lock_flags;
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = base;
 	aiov.iov_len = len;
 	auio.uio_resid = len;
 	auio.uio_offset = offset;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = rw;
 	auio.uio_td = td;
 	error = 0;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((ioflg & IO_RANGELOCKED) == 0) {
 			if (rw == UIO_READ) {
 				rl_cookie = vn_rangelock_rlock(vp, offset,
 				    offset + len);
 			} else {
 				rl_cookie = vn_rangelock_wlock(vp, offset,
 				    offset + len);
 			}
 		} else
 			rl_cookie = NULL;
 		mp = NULL;
 		if (rw == UIO_WRITE) { 
 			if (vp->v_type != VCHR &&
 			    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
 			    != 0)
 				goto out;
 			if (MNT_SHARED_WRITES(mp) ||
 			    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount)))
 				lock_flags = LK_SHARED;
 			else
 				lock_flags = LK_EXCLUSIVE;
 		} else
 			lock_flags = LK_SHARED;
 		vn_lock(vp, lock_flags | LK_RETRY);
 	} else
 		rl_cookie = NULL;
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 #ifdef MAC
 	if ((ioflg & IO_NOMACCHECK) == 0) {
 		if (rw == UIO_READ)
 			error = mac_vnode_check_read(active_cred, file_cred,
 			    vp);
 		else
 			error = mac_vnode_check_write(active_cred, file_cred,
 			    vp);
 	}
 #endif
 	if (error == 0) {
 		if (file_cred != NULL)
 			cred = file_cred;
 		else
 			cred = active_cred;
 		if (do_vn_io_fault(vp, &auio)) {
 			args.kind = VN_IO_FAULT_VOP;
 			args.cred = cred;
 			args.flags = ioflg;
 			args.args.vop_args.vp = vp;
 			error = vn_io_fault1(vp, &auio, &args, td);
 		} else if (rw == UIO_READ) {
 			error = VOP_READ(vp, &auio, ioflg, cred);
 		} else /* if (rw == UIO_WRITE) */ {
 			error = VOP_WRITE(vp, &auio, ioflg, cred);
 		}
 	}
 	if (aresid)
 		*aresid = auio.uio_resid;
 	else
 		if (auio.uio_resid && error == 0)
 			error = EIO;
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		VOP_UNLOCK(vp, 0);
 		if (mp != NULL)
 			vn_finished_write(mp);
 	}
  out:
 	if (rl_cookie != NULL)
 		vn_rangelock_unlock(vp, rl_cookie);
 	return (error);
 }
 
 /*
  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  * request is split up into smaller chunks and we try to avoid saturating
  * the buffer cache while potentially holding a vnode locked, so we 
  * check bwillwrite() before calling vn_rdwr().  We also call kern_yield()
  * to give other processes a chance to lock the vnode (either other processes
  * core'ing the same binary, or unrelated processes scanning the directory).
  */
 int
 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
     file_cred, aresid, td)
 	enum uio_rw rw;
 	struct vnode *vp;
 	void *base;
 	size_t len;
 	off_t offset;
 	enum uio_seg segflg;
 	int ioflg;
 	struct ucred *active_cred;
 	struct ucred *file_cred;
 	size_t *aresid;
 	struct thread *td;
 {
 	int error = 0;
 	ssize_t iaresid;
 
 	do {
 		int chunk;
 
 		/*
 		 * Force `offset' to a multiple of MAXBSIZE except possibly
 		 * for the first chunk, so that filesystems only need to
 		 * write full blocks except possibly for the first and last
 		 * chunks.
 		 */
 		chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
 
 		if (chunk > len)
 			chunk = len;
 		if (rw != UIO_READ && vp->v_type == VREG)
 			bwillwrite();
 		iaresid = 0;
 		error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
 		    ioflg, active_cred, file_cred, &iaresid, td);
 		len -= chunk;	/* aresid calc already includes length */
 		if (error)
 			break;
 		offset += chunk;
 		base = (char *)base + chunk;
 		kern_yield(PRI_USER);
 	} while (len);
 	if (aresid)
 		*aresid = len + iaresid;
 	return (error);
 }
 
 off_t
 foffset_lock(struct file *fp, int flags)
 {
 	struct mtx *mtxp;
 	off_t res;
 
 	KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
 
 #if OFF_MAX <= LONG_MAX
 	/*
 	 * Caller only wants the current f_offset value.  Assume that
 	 * the long and shorter integer types reads are atomic.
 	 */
 	if ((flags & FOF_NOLOCK) != 0)
 		return (fp->f_offset);
 #endif
 
 	/*
 	 * According to McKusick the vn lock was protecting f_offset here.
 	 * It is now protected by the FOFFSET_LOCKED flag.
 	 */
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if ((flags & FOF_NOLOCK) == 0) {
 		while (fp->f_vnread_flags & FOFFSET_LOCKED) {
 			fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
 			msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
 			    "vofflock", 0);
 		}
 		fp->f_vnread_flags |= FOFFSET_LOCKED;
 	}
 	res = fp->f_offset;
 	mtx_unlock(mtxp);
 	return (res);
 }
 
 void
 foffset_unlock(struct file *fp, off_t val, int flags)
 {
 	struct mtx *mtxp;
 
 	KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed"));
 
 #if OFF_MAX <= LONG_MAX
 	if ((flags & FOF_NOLOCK) != 0) {
 		if ((flags & FOF_NOUPDATE) == 0)
 			fp->f_offset = val;
 		if ((flags & FOF_NEXTOFF) != 0)
 			fp->f_nextoff = val;
 		return;
 	}
 #endif
 
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if ((flags & FOF_NOUPDATE) == 0)
 		fp->f_offset = val;
 	if ((flags & FOF_NEXTOFF) != 0)
 		fp->f_nextoff = val;
 	if ((flags & FOF_NOLOCK) == 0) {
 		KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0,
 		    ("Lost FOFFSET_LOCKED"));
 		if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
 			wakeup(&fp->f_vnread_flags);
 		fp->f_vnread_flags = 0;
 	}
 	mtx_unlock(mtxp);
 }
 
 void
 foffset_lock_uio(struct file *fp, struct uio *uio, int flags)
 {
 
 	if ((flags & FOF_OFFSET) == 0)
 		uio->uio_offset = foffset_lock(fp, flags);
 }
 
 void
 foffset_unlock_uio(struct file *fp, struct uio *uio, int flags)
 {
 
 	if ((flags & FOF_OFFSET) == 0)
 		foffset_unlock(fp, uio->uio_offset, flags);
 }
 
 static int
 get_advice(struct file *fp, struct uio *uio)
 {
 	struct mtx *mtxp;
 	int ret;
 
 	ret = POSIX_FADV_NORMAL;
 	if (fp->f_advice == NULL || fp->f_vnode->v_type != VREG)
 		return (ret);
 
 	mtxp = mtx_pool_find(mtxpool_sleep, fp);
 	mtx_lock(mtxp);
 	if (fp->f_advice != NULL &&
 	    uio->uio_offset >= fp->f_advice->fa_start &&
 	    uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
 		ret = fp->f_advice->fa_advice;
 	mtx_unlock(mtxp);
 	return (ret);
 }
 
 /*
  * File table vnode read routine.
  */
 static int
 vn_read(fp, uio, active_cred, flags, td)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *active_cred;
 	int flags;
 	struct thread *td;
 {
 	struct vnode *vp;
 	off_t orig_offset;
 	int error, ioflag;
 	int advice;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
 	KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
 	vp = fp->f_vnode;
 	ioflag = 0;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	advice = get_advice(fp, uio);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_NOREUSE:
 		ioflag |= sequential_heuristic(uio, fp);
 		break;
 	case POSIX_FADV_RANDOM:
 		/* Disable read-ahead for random I/O. */
 		break;
 	}
 	orig_offset = uio->uio_offset;
 
 #ifdef MAC
 	error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READ(vp, uio, ioflag, fp->f_cred);
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0);
 	if (error == 0 && advice == POSIX_FADV_NOREUSE &&
 	    orig_offset != uio->uio_offset)
 		/*
 		 * Use POSIX_FADV_DONTNEED to flush pages and buffers
 		 * for the backing file after a POSIX_FADV_NOREUSE
 		 * read(2).
 		 */
 		error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
 		    POSIX_FADV_DONTNEED);
 	return (error);
 }
 
 /*
  * File table vnode write routine.
  */
 static int
 vn_write(fp, uio, active_cred, flags, td)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *active_cred;
 	int flags;
 	struct thread *td;
 {
 	struct vnode *vp;
 	struct mount *mp;
 	off_t orig_offset;
 	int error, ioflag, lock_flags;
 	int advice;
 
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
 	    uio->uio_td, td));
 	KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
 	vp = fp->f_vnode;
 	if (vp->v_type == VREG)
 		bwillwrite();
 	ioflag = IO_UNIT;
 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
 		ioflag |= IO_APPEND;
 	if (fp->f_flag & FNONBLOCK)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	if ((fp->f_flag & O_FSYNC) ||
 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
 		ioflag |= IO_SYNC;
 	mp = NULL;
 	if (vp->v_type != VCHR &&
 	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto unlock;
 
 	advice = get_advice(fp, uio);
 
 	if (MNT_SHARED_WRITES(mp) ||
 	    (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) {
 		lock_flags = LK_SHARED;
 	} else {
 		lock_flags = LK_EXCLUSIVE;
 	}
 
 	vn_lock(vp, lock_flags | LK_RETRY);
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_NOREUSE:
 		ioflag |= sequential_heuristic(uio, fp);
 		break;
 	case POSIX_FADV_RANDOM:
 		/* XXX: Is this correct? */
 		break;
 	}
 	orig_offset = uio->uio_offset;
 
 #ifdef MAC
 	error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
 	if (error == 0)
 #endif
 		error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0);
 	if (vp->v_type != VCHR)
 		vn_finished_write(mp);
 	if (error == 0 && advice == POSIX_FADV_NOREUSE &&
 	    orig_offset != uio->uio_offset)
 		/*
 		 * Use POSIX_FADV_DONTNEED to flush pages and buffers
 		 * for the backing file after a POSIX_FADV_NOREUSE
 		 * write(2).
 		 */
 		error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
 		    POSIX_FADV_DONTNEED);
 unlock:
 	return (error);
 }
 
 /*
  * The vn_io_fault() is a wrapper around vn_read() and vn_write() to
  * prevent the following deadlock:
  *
  * Assume that the thread A reads from the vnode vp1 into userspace
  * buffer buf1 backed by the pages of vnode vp2.  If a page in buf1 is
  * currently not resident, then system ends up with the call chain
  *   vn_read() -> VOP_READ(vp1) -> uiomove() -> [Page Fault] ->
  *     vm_fault(buf1) -> vnode_pager_getpages(vp2) -> VOP_GETPAGES(vp2)
  * which establishes lock order vp1->vn_lock, then vp2->vn_lock.
  * If, at the same time, thread B reads from vnode vp2 into buffer buf2
  * backed by the pages of vnode vp1, and some page in buf2 is not
  * resident, we get a reversed order vp2->vn_lock, then vp1->vn_lock.
  *
  * To prevent the lock order reversal and deadlock, vn_io_fault() does
  * not allow page faults to happen during VOP_READ() or VOP_WRITE().
  * Instead, it first tries to do the whole range i/o with pagefaults
  * disabled. If all pages in the i/o buffer are resident and mapped,
  * VOP will succeed (ignoring the genuine filesystem errors).
  * Otherwise, we get back EFAULT, and vn_io_fault() falls back to do
  * i/o in chunks, with all pages in the chunk prefaulted and held
  * using vm_fault_quick_hold_pages().
  *
  * Filesystems using this deadlock avoidance scheme should use the
  * array of the held pages from uio, saved in the curthread->td_ma,
  * instead of doing uiomove().  A helper function
  * vn_io_fault_uiomove() converts uiomove request into
  * uiomove_fromphys() over td_ma array.
  *
  * Since vnode locks do not cover the whole i/o anymore, rangelocks
  * make the current i/o request atomic with respect to other i/os and
  * truncations.
  */
 
 /*
  * Decode vn_io_fault_args and perform the corresponding i/o.
  */
 static int
 vn_io_fault_doio(struct vn_io_fault_args *args, struct uio *uio,
     struct thread *td)
 {
 
 	switch (args->kind) {
 	case VN_IO_FAULT_FOP:
 		return ((args->args.fop_args.doio)(args->args.fop_args.fp,
 		    uio, args->cred, args->flags, td));
 	case VN_IO_FAULT_VOP:
 		if (uio->uio_rw == UIO_READ) {
 			return (VOP_READ(args->args.vop_args.vp, uio,
 			    args->flags, args->cred));
 		} else if (uio->uio_rw == UIO_WRITE) {
 			return (VOP_WRITE(args->args.vop_args.vp, uio,
 			    args->flags, args->cred));
 		}
 		break;
 	}
 	panic("vn_io_fault_doio: unknown kind of io %d %d", args->kind,
 	    uio->uio_rw);
 }
 
 static int
 vn_io_fault_touch(char *base, const struct uio *uio)
 {
 	int r;
 
 	r = fubyte(base);
 	if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1))
 		return (EFAULT);
 	return (0);
 }
 
 static int
 vn_io_fault_prefault_user(const struct uio *uio)
 {
 	char *base;
 	const struct iovec *iov;
 	size_t len;
 	ssize_t resid;
 	int error, i;
 
 	KASSERT(uio->uio_segflg == UIO_USERSPACE,
 	    ("vn_io_fault_prefault userspace"));
 
 	error = i = 0;
 	iov = uio->uio_iov;
 	resid = uio->uio_resid;
 	base = iov->iov_base;
 	len = iov->iov_len;
 	while (resid > 0) {
 		error = vn_io_fault_touch(base, uio);
 		if (error != 0)
 			break;
 		if (len < PAGE_SIZE) {
 			if (len != 0) {
 				error = vn_io_fault_touch(base + len - 1, uio);
 				if (error != 0)
 					break;
 				resid -= len;
 			}
 			if (++i >= uio->uio_iovcnt)
 				break;
 			iov = uio->uio_iov + i;
 			base = iov->iov_base;
 			len = iov->iov_len;
 		} else {
 			len -= PAGE_SIZE;
 			base += PAGE_SIZE;
 			resid -= PAGE_SIZE;
 		}
 	}
 	return (error);
 }
 
 /*
  * Common code for vn_io_fault(), agnostic to the kind of i/o request.
  * Uses vn_io_fault_doio() to make the call to an actual i/o function.
  * Used from vn_rdwr() and vn_io_fault(), which encode the i/o request
  * into args and call vn_io_fault1() to handle faults during the user
  * mode buffer accesses.
  */
 static int
 vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args,
     struct thread *td)
 {
 	vm_page_t ma[io_hold_cnt + 2];
 	struct uio *uio_clone, short_uio;
 	struct iovec short_iovec[1];
 	vm_page_t *prev_td_ma;
 	vm_prot_t prot;
 	vm_offset_t addr, end;
 	size_t len, resid;
 	ssize_t adv;
 	int error, cnt, save, saveheld, prev_td_ma_cnt;
 
 	if (vn_io_fault_prefault) {
 		error = vn_io_fault_prefault_user(uio);
 		if (error != 0)
 			return (error); /* Or ignore ? */
 	}
 
 	prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ;
 
 	/*
 	 * The UFS follows IO_UNIT directive and replays back both
 	 * uio_offset and uio_resid if an error is encountered during the
 	 * operation.  But, since the iovec may be already advanced,
 	 * uio is still in an inconsistent state.
 	 *
 	 * Cache a copy of the original uio, which is advanced to the redo
 	 * point using UIO_NOCOPY below.
 	 */
 	uio_clone = cloneuio(uio);
 	resid = uio->uio_resid;
 
 	short_uio.uio_segflg = UIO_USERSPACE;
 	short_uio.uio_rw = uio->uio_rw;
 	short_uio.uio_td = uio->uio_td;
 
 	save = vm_fault_disable_pagefaults();
 	error = vn_io_fault_doio(args, uio, td);
 	if (error != EFAULT)
 		goto out;
 
 	atomic_add_long(&vn_io_faults_cnt, 1);
 	uio_clone->uio_segflg = UIO_NOCOPY;
 	uiomove(NULL, resid - uio->uio_resid, uio_clone);
 	uio_clone->uio_segflg = uio->uio_segflg;
 
 	saveheld = curthread_pflags_set(TDP_UIOHELD);
 	prev_td_ma = td->td_ma;
 	prev_td_ma_cnt = td->td_ma_cnt;
 
 	while (uio_clone->uio_resid != 0) {
 		len = uio_clone->uio_iov->iov_len;
 		if (len == 0) {
 			KASSERT(uio_clone->uio_iovcnt >= 1,
 			    ("iovcnt underflow"));
 			uio_clone->uio_iov++;
 			uio_clone->uio_iovcnt--;
 			continue;
 		}
 		if (len > io_hold_cnt * PAGE_SIZE)
 			len = io_hold_cnt * PAGE_SIZE;
 		addr = (uintptr_t)uio_clone->uio_iov->iov_base;
 		end = round_page(addr + len);
 		if (end < addr) {
 			error = EFAULT;
 			break;
 		}
 		cnt = atop(end - trunc_page(addr));
 		/*
 		 * A perfectly misaligned address and length could cause
 		 * both the start and the end of the chunk to use partial
 		 * page.  +2 accounts for such a situation.
 		 */
 		cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map,
 		    addr, len, prot, ma, io_hold_cnt + 2);
 		if (cnt == -1) {
 			error = EFAULT;
 			break;
 		}
 		short_uio.uio_iov = &short_iovec[0];
 		short_iovec[0].iov_base = (void *)addr;
 		short_uio.uio_iovcnt = 1;
 		short_uio.uio_resid = short_iovec[0].iov_len = len;
 		short_uio.uio_offset = uio_clone->uio_offset;
 		td->td_ma = ma;
 		td->td_ma_cnt = cnt;
 
 		error = vn_io_fault_doio(args, &short_uio, td);
 		vm_page_unhold_pages(ma, cnt);
 		adv = len - short_uio.uio_resid;
 
 		uio_clone->uio_iov->iov_base =
 		    (char *)uio_clone->uio_iov->iov_base + adv;
 		uio_clone->uio_iov->iov_len -= adv;
 		uio_clone->uio_resid -= adv;
 		uio_clone->uio_offset += adv;
 
 		uio->uio_resid -= adv;
 		uio->uio_offset += adv;
 
 		if (error != 0 || adv == 0)
 			break;
 	}
 	td->td_ma = prev_td_ma;
 	td->td_ma_cnt = prev_td_ma_cnt;
 	curthread_pflags_restore(saveheld);
 out:
 	vm_fault_enable_pagefaults(save);
 	free(uio_clone, M_IOV);
 	return (error);
 }
 
 static int
 vn_io_fault(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	fo_rdwr_t *doio;
 	struct vnode *vp;
 	void *rl_cookie;
 	struct vn_io_fault_args args;
 	int error;
 
 	doio = uio->uio_rw == UIO_READ ? vn_read : vn_write;
 	vp = fp->f_vnode;
 	foffset_lock_uio(fp, uio, flags);
 	if (do_vn_io_fault(vp, uio)) {
 		args.kind = VN_IO_FAULT_FOP;
 		args.args.fop_args.fp = fp;
 		args.args.fop_args.doio = doio;
 		args.cred = active_cred;
 		args.flags = flags | FOF_OFFSET;
 		if (uio->uio_rw == UIO_READ) {
 			rl_cookie = vn_rangelock_rlock(vp, uio->uio_offset,
 			    uio->uio_offset + uio->uio_resid);
 		} else if ((fp->f_flag & O_APPEND) != 0 ||
 		    (flags & FOF_OFFSET) == 0) {
 			/* For appenders, punt and lock the whole range. */
 			rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 		} else {
 			rl_cookie = vn_rangelock_wlock(vp, uio->uio_offset,
 			    uio->uio_offset + uio->uio_resid);
 		}
 		error = vn_io_fault1(vp, uio, &args, td);
 		vn_rangelock_unlock(vp, rl_cookie);
 	} else {
 		error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td);
 	}
 	foffset_unlock_uio(fp, uio, flags);
 	return (error);
 }
 
 /*
  * Helper function to perform the requested uiomove operation using
  * the held pages for io->uio_iov[0].iov_base buffer instead of
  * copyin/copyout.  Access to the pages with uiomove_fromphys()
  * instead of iov_base prevents page faults that could occur due to
  * pmap_collect() invalidating the mapping created by
  * vm_fault_quick_hold_pages(), or pageout daemon, page laundry or
  * object cleanup revoking the write access from page mappings.
  *
  * Filesystems specified MNTK_NO_IOPF shall use vn_io_fault_uiomove()
  * instead of plain uiomove().
  */
 int
 vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio)
 {
 	struct uio transp_uio;
 	struct iovec transp_iov[1];
 	struct thread *td;
 	size_t adv;
 	int error, pgadv;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_UIOHELD) == 0 ||
 	    uio->uio_segflg != UIO_USERSPACE)
 		return (uiomove(data, xfersize, uio));
 
 	KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
 	transp_iov[0].iov_base = data;
 	transp_uio.uio_iov = &transp_iov[0];
 	transp_uio.uio_iovcnt = 1;
 	if (xfersize > uio->uio_resid)
 		xfersize = uio->uio_resid;
 	transp_uio.uio_resid = transp_iov[0].iov_len = xfersize;
 	transp_uio.uio_offset = 0;
 	transp_uio.uio_segflg = UIO_SYSSPACE;
 	/*
 	 * Since transp_iov points to data, and td_ma page array
 	 * corresponds to original uio->uio_iov, we need to invert the
 	 * direction of the i/o operation as passed to
 	 * uiomove_fromphys().
 	 */
 	switch (uio->uio_rw) {
 	case UIO_WRITE:
 		transp_uio.uio_rw = UIO_READ;
 		break;
 	case UIO_READ:
 		transp_uio.uio_rw = UIO_WRITE;
 		break;
 	}
 	transp_uio.uio_td = uio->uio_td;
 	error = uiomove_fromphys(td->td_ma,
 	    ((vm_offset_t)uio->uio_iov->iov_base) & PAGE_MASK,
 	    xfersize, &transp_uio);
 	adv = xfersize - transp_uio.uio_resid;
 	pgadv =
 	    (((vm_offset_t)uio->uio_iov->iov_base + adv) >> PAGE_SHIFT) -
 	    (((vm_offset_t)uio->uio_iov->iov_base) >> PAGE_SHIFT);
 	td->td_ma += pgadv;
 	KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
 	    pgadv));
 	td->td_ma_cnt -= pgadv;
 	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + adv;
 	uio->uio_iov->iov_len -= adv;
 	uio->uio_resid -= adv;
 	uio->uio_offset += adv;
 	return (error);
 }
 
 int
 vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
     struct uio *uio)
 {
 	struct thread *td;
 	vm_offset_t iov_base;
 	int cnt, pgadv;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_UIOHELD) == 0 ||
 	    uio->uio_segflg != UIO_USERSPACE)
 		return (uiomove_fromphys(ma, offset, xfersize, uio));
 
 	KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt));
 	cnt = xfersize > uio->uio_resid ? uio->uio_resid : xfersize;
 	iov_base = (vm_offset_t)uio->uio_iov->iov_base;
 	switch (uio->uio_rw) {
 	case UIO_WRITE:
 		pmap_copy_pages(td->td_ma, iov_base & PAGE_MASK, ma,
 		    offset, cnt);
 		break;
 	case UIO_READ:
 		pmap_copy_pages(ma, offset, td->td_ma, iov_base & PAGE_MASK,
 		    cnt);
 		break;
 	}
 	pgadv = ((iov_base + cnt) >> PAGE_SHIFT) - (iov_base >> PAGE_SHIFT);
 	td->td_ma += pgadv;
 	KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt,
 	    pgadv));
 	td->td_ma_cnt -= pgadv;
 	uio->uio_iov->iov_base = (char *)(iov_base + cnt);
 	uio->uio_iov->iov_len -= cnt;
 	uio->uio_resid -= cnt;
 	uio->uio_offset += cnt;
 	return (0);
 }
 
 
 /*
  * File table truncate routine.
  */
 static int
 vn_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vattr vattr;
 	struct mount *mp;
 	struct vnode *vp;
 	void *rl_cookie;
 	int error;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Lock the whole range for truncation.  Otherwise split i/o
 	 * might happen partly before and partly after the truncation.
 	 */
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		goto out1;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
 	if (error)
 		goto out;
 #endif
 	error = vn_writechk(vp);
 	if (error == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		if ((fp->f_flag & O_FSYNC) != 0)
 			vattr.va_vaflags |= VA_SYNC;
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred);
 	}
 out:
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 out1:
 	vn_rangelock_unlock(vp, rl_cookie);
 	return (error);
 }
 
 /*
  * File table vnode stat routine.
  */
 static int
 vn_statfile(fp, sb, active_cred, td)
 	struct file *fp;
 	struct stat *sb;
 	struct ucred *active_cred;
 	struct thread *td;
 {
 	struct vnode *vp = fp->f_vnode;
 	int error;
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
 	VOP_UNLOCK(vp, 0);
 
 	return (error);
 }
 
 /*
  * Stat a vnode; implementation for the stat syscall
  */
 int
 vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
     struct ucred *file_cred, struct thread *td)
 {
 	struct vattr vattr;
 	struct vattr *vap;
 	int error;
 	u_short mode;
 
 	AUDIT_ARG_VNODE1(vp);
 #ifdef MAC
 	error = mac_vnode_check_stat(active_cred, file_cred, vp);
 	if (error)
 		return (error);
 #endif
 
 	vap = &vattr;
 
 	/*
 	 * Initialize defaults for new and unusual fields, so that file
 	 * systems which don't support these fields don't need to know
 	 * about them.
 	 */
 	vap->va_birthtime.tv_sec = -1;
 	vap->va_birthtime.tv_nsec = 0;
 	vap->va_fsid = VNOVAL;
 	vap->va_rdev = NODEV;
 
 	error = VOP_GETATTR(vp, vap, active_cred);
 	if (error)
 		return (error);
 
 	/*
 	 * Zero the spare stat fields
 	 */
 	bzero(sb, sizeof *sb);
 
 	/*
 	 * Copy from vattr table
 	 */
 	if (vap->va_fsid != VNOVAL)
 		sb->st_dev = vap->va_fsid;
 	else
 		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
 	sb->st_ino = vap->va_fileid;
 	mode = vap->va_mode;
 	switch (vap->va_type) {
 	case VREG:
 		mode |= S_IFREG;
 		break;
 	case VDIR:
 		mode |= S_IFDIR;
 		break;
 	case VBLK:
 		mode |= S_IFBLK;
 		break;
 	case VCHR:
 		mode |= S_IFCHR;
 		break;
 	case VLNK:
 		mode |= S_IFLNK;
 		break;
 	case VSOCK:
 		mode |= S_IFSOCK;
 		break;
 	case VFIFO:
 		mode |= S_IFIFO;
 		break;
 	default:
 		return (EBADF);
 	}
 	sb->st_mode = mode;
 	sb->st_nlink = vap->va_nlink;
 	sb->st_uid = vap->va_uid;
 	sb->st_gid = vap->va_gid;
 	sb->st_rdev = vap->va_rdev;
 	if (vap->va_size > OFF_MAX)
 		return (EOVERFLOW);
 	sb->st_size = vap->va_size;
 	sb->st_atim = vap->va_atime;
 	sb->st_mtim = vap->va_mtime;
 	sb->st_ctim = vap->va_ctime;
 	sb->st_birthtim = vap->va_birthtime;
 
         /*
 	 * According to www.opengroup.org, the meaning of st_blksize is 
 	 *   "a filesystem-specific preferred I/O block size for this 
 	 *    object.  In some filesystem types, this may vary from file
 	 *    to file"
 	 * Use miminum/default of PAGE_SIZE (e.g. for VCHR).
 	 */
 
 	sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
 	
 	sb->st_flags = vap->va_flags;
 	if (priv_check(td, PRIV_VFS_GENERATION))
 		sb->st_gen = 0;
 	else
 		sb->st_gen = vap->va_gen;
 
 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
 	return (0);
 }
 
 /*
  * File table vnode ioctl routine.
  */
 static int
 vn_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	int error;
 
 	vp = fp->f_vnode;
 	switch (vp->v_type) {
 	case VDIR:
 	case VREG:
 		switch (com) {
 		case FIONREAD:
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			error = VOP_GETATTR(vp, &vattr, active_cred);
 			VOP_UNLOCK(vp, 0);
 			if (error == 0)
 				*(int *)data = vattr.va_size - fp->f_offset;
 			return (error);
 		case FIONBIO:
 		case FIOASYNC:
 			return (0);
 		default:
 			return (VOP_IOCTL(vp, com, data, fp->f_flag,
 			    active_cred, td));
 		}
 		break;
 	case VCHR:
 		return (VOP_IOCTL(vp, com, data, fp->f_flag,
 		    active_cred, td));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /*
  * File table vnode poll routine.
  */
 static int
 vn_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = fp->f_vnode;
 #ifdef MAC
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
 	VOP_UNLOCK(vp, 0);
 	if (!error)
 #endif
 
 	error = VOP_POLL(vp, events, fp->f_cred, td);
 	return (error);
 }
 
 /*
  * Acquire the requested lock and then check for validity.  LK_RETRY
  * permits vn_lock to return doomed vnodes.
  */
 int
 _vn_lock(struct vnode *vp, int flags, char *file, int line)
 {
 	int error;
 
 	VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
 	    ("vn_lock: no locktype"));
 	VNASSERT(vp->v_holdcnt != 0, vp, ("vn_lock: zero hold count"));
 retry:
 	error = VOP_LOCK1(vp, flags, file, line);
 	flags &= ~LK_INTERLOCK;	/* Interlock is always dropped. */
 	KASSERT((flags & LK_RETRY) == 0 || error == 0,
 	    ("vn_lock: error %d incompatible with flags %#x", error, flags));
 
 	if ((flags & LK_RETRY) == 0) {
 		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
 			VOP_UNLOCK(vp, 0);
 			error = ENOENT;
 		}
 	} else if (error != 0)
 		goto retry;
 	return (error);
 }
 
 /*
  * File table vnode close routine.
  */
 static int
 vn_closefile(struct file *fp, struct thread *td)
 {
 	struct vnode *vp;
 	struct flock lf;
 	int error;
 	bool ref;
 
 	vp = fp->f_vnode;
 	fp->f_ops = &badfileops;
 	ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
 
 	error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref);
 
 	if (__predict_false(ref)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		lf.l_type = F_UNLCK;
 		(void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
 		vrele(vp);
 	}
 	return (error);
 }
 
 static bool
 vn_suspendable(struct mount *mp)
 {
 
 	return (mp->mnt_op->vfs_susp_clean != NULL);
 }
 
 /*
  * Preparing to start a filesystem write operation. If the operation is
  * permitted, then we bump the count of operations in progress and
  * proceed. If a suspend request is in progress, we wait until the
  * suspension is over, and then proceed.
  */
 static int
 vn_start_write_locked(struct mount *mp, int flags)
 {
 	int error, mflags;
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 	error = 0;
 
 	/*
 	 * Check on status of suspension.
 	 */
 	if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
 	    mp->mnt_susp_owner != curthread) {
 		mflags = ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ?
 		    (flags & PCATCH) : 0) | (PUSER - 1);
 		while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 			if (flags & V_NOWAIT) {
 				error = EWOULDBLOCK;
 				goto unlock;
 			}
 			error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags,
 			    "suspfs", 0);
 			if (error)
 				goto unlock;
 		}
 	}
 	if (flags & V_XSLEEP)
 		goto unlock;
 	mp->mnt_writeopcount++;
 unlock:
 	if (error != 0 || (flags & V_XSLEEP) != 0)
 		MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 int
 vn_start_write(struct vnode *vp, struct mount **mpp, int flags)
 {
 	struct mount *mp;
 	int error;
 
 	KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL),
 	    ("V_MNTREF requires mp"));
 
 	error = 0;
 	/*
 	 * If a vnode is provided, get and return the mount point that
 	 * to which it will write.
 	 */
 	if (vp != NULL) {
 		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 			*mpp = NULL;
 			if (error != EOPNOTSUPP)
 				return (error);
 			return (0);
 		}
 	}
 	if ((mp = *mpp) == NULL)
 		return (0);
 
 	if (!vn_suspendable(mp)) {
 		if (vp != NULL || (flags & V_MNTREF) != 0)
 			vfs_rel(mp);
 		return (0);
 	}
 
 	/*
 	 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 	 * a vfs_ref().
 	 * As long as a vnode is not provided we need to acquire a
 	 * refcount for the provided mountpoint too, in order to
 	 * emulate a vfs_ref().
 	 */
 	MNT_ILOCK(mp);
 	if (vp == NULL && (flags & V_MNTREF) == 0)
 		MNT_REF(mp);
 
 	return (vn_start_write_locked(mp, flags));
 }
 
 /*
  * Secondary suspension. Used by operations such as vop_inactive
  * routines that are needed by the higher level functions. These
  * are allowed to proceed until all the higher level functions have
  * completed (indicated by mnt_writeopcount dropping to zero). At that
  * time, these operations are halted until the suspension is over.
  */
 int
 vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags)
 {
 	struct mount *mp;
 	int error;
 
 	KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL),
 	    ("V_MNTREF requires mp"));
 
  retry:
 	if (vp != NULL) {
 		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 			*mpp = NULL;
 			if (error != EOPNOTSUPP)
 				return (error);
 			return (0);
 		}
 	}
 	/*
 	 * If we are not suspended or have not yet reached suspended
 	 * mode, then let the operation proceed.
 	 */
 	if ((mp = *mpp) == NULL)
 		return (0);
 
 	if (!vn_suspendable(mp)) {
 		if (vp != NULL || (flags & V_MNTREF) != 0)
 			vfs_rel(mp);
 		return (0);
 	}
 
 	/*
 	 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 	 * a vfs_ref().
 	 * As long as a vnode is not provided we need to acquire a
 	 * refcount for the provided mountpoint too, in order to
 	 * emulate a vfs_ref().
 	 */
 	MNT_ILOCK(mp);
 	if (vp == NULL && (flags & V_MNTREF) == 0)
 		MNT_REF(mp);
 	if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 		mp->mnt_secondary_writes++;
 		mp->mnt_secondary_accwrites++;
 		MNT_IUNLOCK(mp);
 		return (0);
 	}
 	if (flags & V_NOWAIT) {
 		MNT_REL(mp);
 		MNT_IUNLOCK(mp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Wait for the suspension to finish.
 	 */
 	error = msleep(&mp->mnt_flag, MNT_MTX(mp), (PUSER - 1) | PDROP |
 	    ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0),
 	    "suspfs", 0);
 	vfs_rel(mp);
 	if (error == 0)
 		goto retry;
 	return (error);
 }
 
 /*
  * Filesystem write operation has completed. If we are suspending and this
  * operation is the last one, notify the suspender that the suspension is
  * now in effect.
  */
 void
 vn_finished_write(struct mount *mp)
 {
 	if (mp == NULL || !vn_suspendable(mp))
 		return;
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	mp->mnt_writeopcount--;
 	if (mp->mnt_writeopcount < 0)
 		panic("vn_finished_write: neg cnt");
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 	    mp->mnt_writeopcount <= 0)
 		wakeup(&mp->mnt_writeopcount);
 	MNT_IUNLOCK(mp);
 }
 
 
 /*
  * Filesystem secondary write operation has completed. If we are
  * suspending and this operation is the last one, notify the suspender
  * that the suspension is now in effect.
  */
 void
 vn_finished_secondary_write(struct mount *mp)
 {
 	if (mp == NULL || !vn_suspendable(mp))
 		return;
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	mp->mnt_secondary_writes--;
 	if (mp->mnt_secondary_writes < 0)
 		panic("vn_finished_secondary_write: neg cnt");
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 	    mp->mnt_secondary_writes <= 0)
 		wakeup(&mp->mnt_secondary_writes);
 	MNT_IUNLOCK(mp);
 }
 
 
 
 /*
  * Request a filesystem to suspend write operations.
  */
 int
 vfs_write_suspend(struct mount *mp, int flags)
 {
 	int error;
 
 	MPASS(vn_suspendable(mp));
 
 	MNT_ILOCK(mp);
 	if (mp->mnt_susp_owner == curthread) {
 		MNT_IUNLOCK(mp);
 		return (EALREADY);
 	}
 	while (mp->mnt_kern_flag & MNTK_SUSPEND)
 		msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
 
 	/*
 	 * Unmount holds a write reference on the mount point.  If we
 	 * own busy reference and drain for writers, we deadlock with
 	 * the reference draining in the unmount path.  Callers of
 	 * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if
 	 * vfs_busy() reference is owned and caller is not in the
 	 * unmount context.
 	 */
 	if ((flags & VS_SKIP_UNMOUNT) != 0 &&
 	    (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 		MNT_IUNLOCK(mp);
 		return (EBUSY);
 	}
 
 	mp->mnt_kern_flag |= MNTK_SUSPEND;
 	mp->mnt_susp_owner = curthread;
 	if (mp->mnt_writeopcount > 0)
 		(void) msleep(&mp->mnt_writeopcount, 
 		    MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 	else
 		MNT_IUNLOCK(mp);
 	if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)
 		vfs_write_resume(mp, 0);
 	return (error);
 }
 
 /*
  * Request a filesystem to resume write operations.
  */
 void
 vfs_write_resume(struct mount *mp, int flags)
 {
 
 	MPASS(vn_suspendable(mp));
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 		KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
 		mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 				       MNTK_SUSPENDED);
 		mp->mnt_susp_owner = NULL;
 		wakeup(&mp->mnt_writeopcount);
 		wakeup(&mp->mnt_flag);
 		curthread->td_pflags &= ~TDP_IGNSUSP;
 		if ((flags & VR_START_WRITE) != 0) {
 			MNT_REF(mp);
 			mp->mnt_writeopcount++;
 		}
 		MNT_IUNLOCK(mp);
 		if ((flags & VR_NO_SUSPCLR) == 0)
 			VFS_SUSP_CLEAN(mp);
 	} else if ((flags & VR_START_WRITE) != 0) {
 		MNT_REF(mp);
 		vn_start_write_locked(mp, 0);
 	} else {
 		MNT_IUNLOCK(mp);
 	}
 }
 
 /*
  * Helper loop around vfs_write_suspend() for filesystem unmount VFS
  * methods.
  */
 int
 vfs_write_suspend_umnt(struct mount *mp)
 {
 	int error;
 
 	MPASS(vn_suspendable(mp));
 	KASSERT((curthread->td_pflags & TDP_IGNSUSP) == 0,
 	    ("vfs_write_suspend_umnt: recursed"));
 
 	/* dounmount() already called vn_start_write(). */
 	for (;;) {
 		vn_finished_write(mp);
 		error = vfs_write_suspend(mp, 0);
 		if (error != 0) {
 			vn_start_write(NULL, &mp, V_WAIT);
 			return (error);
 		}
 		MNT_ILOCK(mp);
 		if ((mp->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 			break;
 		MNT_IUNLOCK(mp);
 		vn_start_write(NULL, &mp, V_WAIT);
 	}
 	mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
 	wakeup(&mp->mnt_flag);
 	MNT_IUNLOCK(mp);
 	curthread->td_pflags |= TDP_IGNSUSP;
 	return (0);
 }
 
 /*
  * Implement kqueues for files by translating it to vnode operation.
  */
 static int
 vn_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (VOP_KQFILTER(fp->f_vnode, kn));
 }
 
 /*
  * Simplified in-kernel wrapper calls for extended attribute access.
  * Both calls pass in a NULL credential, authorizing as "kernel" access.
  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
  */
 int
 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, int *buflen, char *buf, struct thread *td)
 {
 	struct uio	auio;
 	struct iovec	iov;
 	int	error;
 
 	iov.iov_len = *buflen;
 	iov.iov_base = buf;
 
 	auio.uio_iov = &iov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;
 	auio.uio_resid = *buflen;
 
 	if ((ioflg & IO_NODELOCKED) == 0)
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute retrieval as kernel */
 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 	    td);
 
 	if ((ioflg & IO_NODELOCKED) == 0)
 		VOP_UNLOCK(vp, 0);
 
 	if (error == 0) {
 		*buflen = *buflen - auio.uio_resid;
 	}
 
 	return (error);
 }
 
 /*
  * XXX failure mode if partially written?
  */
 int
 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, int buflen, char *buf, struct thread *td)
 {
 	struct uio	auio;
 	struct iovec	iov;
 	struct mount	*mp;
 	int	error;
 
 	iov.iov_len = buflen;
 	iov.iov_base = buf;
 
 	auio.uio_iov = &iov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;
 	auio.uio_resid = buflen;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute setting as kernel */
 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0);
 	}
 
 	return (error);
 }
 
 int
 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
     const char *attrname, struct thread *td)
 {
 	struct mount	*mp;
 	int	error;
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 
 	/* authorize attribute removal as kernel */
 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 		    NULL, td);
 
 	if ((ioflg & IO_NODELOCKED) == 0) {
 		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0);
 	}
 
 	return (error);
 }
 
 static int
 vn_get_ino_alloc_vget(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (VFS_VGET(mp, *(ino_t *)arg, lkflags, rvp));
 }
 
 int
 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
 {
 
 	return (vn_vget_ino_gen(vp, vn_get_ino_alloc_vget, &ino,
 	    lkflags, rvp));
 }
 
 int
 vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg,
     int lkflags, struct vnode **rvp)
 {
 	struct mount *mp;
 	int ltype, error;
 
 	ASSERT_VOP_LOCKED(vp, "vn_vget_ino_get");
 	mp = vp->v_mount;
 	ltype = VOP_ISLOCKED(vp);
 	KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
 	    ("vn_vget_ino: vp not locked"));
 	error = vfs_busy(mp, MBF_NOWAIT);
 	if (error != 0) {
 		vfs_ref(mp);
 		VOP_UNLOCK(vp, 0);
 		error = vfs_busy(mp, 0);
 		vn_lock(vp, ltype | LK_RETRY);
 		vfs_rel(mp);
 		if (error != 0)
 			return (ENOENT);
 		if (vp->v_iflag & VI_DOOMED) {
 			vfs_unbusy(mp);
 			return (ENOENT);
 		}
 	}
 	VOP_UNLOCK(vp, 0);
 	error = alloc(mp, alloc_arg, lkflags, rvp);
 	vfs_unbusy(mp);
 	if (*rvp != vp)
 		vn_lock(vp, ltype | LK_RETRY);
 	if (vp->v_iflag & VI_DOOMED) {
 		if (error == 0) {
 			if (*rvp == vp)
 				vunref(vp);
 			else
 				vput(*rvp);
 		}
 		error = ENOENT;
 	}
 	return (error);
 }
 
 int
 vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio,
     struct thread *td)
 {
 
 	if (vp->v_type != VREG || td == NULL)
 		return (0);
 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
 	    lim_cur(td, RLIMIT_FSIZE)) {
 		PROC_LOCK(td->td_proc);
 		kern_psignal(td->td_proc, SIGXFSZ);
 		PROC_UNLOCK(td->td_proc);
 		return (EFBIG);
 	}
 	return (0);
 }
 
 int
 vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 
 	vp = fp->f_vnode;
 #ifdef AUDIT
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	VOP_UNLOCK(vp, 0);
 #endif
 	return (setfmode(td, active_cred, vp, mode));
 }
 
 int
 vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 	struct vnode *vp;
 
 	vp = fp->f_vnode;
 #ifdef AUDIT
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	VOP_UNLOCK(vp, 0);
 #endif
 	return (setfown(td, active_cred, vp, uid, gid));
 }
 
 void
 vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_object_t object;
 
 	if ((object = vp->v_object) == NULL)
 		return;
 	VM_OBJECT_WLOCK(object);
 	vm_object_page_remove(object, start, end, 0);
 	VM_OBJECT_WUNLOCK(object);
 }
 
 int
 vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
 {
 	struct vattr va;
 	daddr_t bn, bnp;
 	uint64_t bsize;
 	off_t noff;
 	int error;
 
 	KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
 	    ("Wrong command %lu", cmd));
 
 	if (vn_lock(vp, LK_SHARED) != 0)
 		return (EBADF);
 	if (vp->v_type != VREG) {
 		error = ENOTTY;
 		goto unlock;
 	}
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error != 0)
 		goto unlock;
 	noff = *off;
 	if (noff >= va.va_size) {
 		error = ENXIO;
 		goto unlock;
 	}
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize) {
 		error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL);
 		if (error == EOPNOTSUPP) {
 			error = ENOTTY;
 			goto unlock;
 		}
 		if ((bnp == -1 && cmd == FIOSEEKHOLE) ||
 		    (bnp != -1 && cmd == FIOSEEKDATA)) {
 			noff = bn * bsize;
 			if (noff < *off)
 				noff = *off;
 			goto unlock;
 		}
 	}
 	if (noff > va.va_size)
 		noff = va.va_size;
 	/* noff == va.va_size. There is an implicit hole at the end of file. */
 	if (cmd == FIOSEEKDATA)
 		error = ENXIO;
 unlock:
 	VOP_UNLOCK(vp, 0);
 	if (error == 0)
 		*off = noff;
 	return (error);
 }
 
 int
 vn_seek(struct file *fp, off_t offset, int whence, struct thread *td)
 {
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vattr vattr;
 	off_t foffset, size;
 	int error, noneg;
 
 	cred = td->td_ucred;
 	vp = fp->f_vnode;
 	foffset = foffset_lock(fp, 0);
 	noneg = (vp->v_type != VCHR);
 	error = 0;
 	switch (whence) {
 	case L_INCR:
 		if (noneg &&
 		    (foffset < 0 ||
 		    (offset > 0 && foffset > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += foffset;
 		break;
 	case L_XTND:
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, cred);
 		VOP_UNLOCK(vp, 0);
 		if (error)
 			break;
 
 		/*
 		 * If the file references a disk device, then fetch
 		 * the media size and use that to determine the ending
 		 * offset.
 		 */
 		if (vattr.va_size == 0 && vp->v_type == VCHR &&
 		    fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0)
 			vattr.va_size = size;
 		if (noneg &&
 		    (vattr.va_size > OFF_MAX ||
 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += vattr.va_size;
 		break;
 	case L_SET:
 		break;
 	case SEEK_DATA:
 		error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td);
 		break;
 	case SEEK_HOLE:
 		error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td);
 		break;
 	default:
 		error = EINVAL;
 	}
 	if (error == 0 && noneg && offset < 0)
 		error = EINVAL;
 	if (error != 0)
 		goto drop;
 	VFS_KNOTE_UNLOCKED(vp, 0);
 	td->td_uretoff.tdu_off = offset;
 drop:
 	foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0);
 	return (error);
 }
 
 int
 vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     struct thread *td)
 {
 	int error;
 
 	/*
 	 * Grant permission if the caller is the owner of the file, or
 	 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
 	 * on the file.  If the time pointer is null, then write
 	 * permission on the file is also sufficient.
 	 *
 	 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
 	 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
 	 * will be allowed to set the times [..] to the current
 	 * server time.
 	 */
 	error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
 	if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0)
 		error = VOP_ACCESS(vp, VWRITE, cred, td);
 	return (error);
 }
 
 int
 vn_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct vnode *vp;
 	int error;
 
 	if (fp->f_type == DTYPE_FIFO)
 		kif->kf_type = KF_TYPE_FIFO;
 	else
 		kif->kf_type = KF_TYPE_VNODE;
 	vp = fp->f_vnode;
 	vref(vp);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fill_kinfo_vnode(vp, kif);
 	vrele(vp);
 	FILEDESC_SLOCK(fdp);
 	return (error);
 }
 
 static inline void
 vn_fill_junk(struct kinfo_file *kif)
 {
 	size_t len, olen;
 
 	/*
 	 * Simulate vn_fullpath returning changing values for a given
 	 * vp during e.g. coredump.
 	 */
 	len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
 	olen = strlen(kif->kf_path);
 	if (len < olen)
 		strcpy(&kif->kf_path[len - 1], "$");
 	else
 		for (; olen < len; olen++)
 			strcpy(&kif->kf_path[olen], "A");
 }
 
 int
 vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif)
 {
 	struct vattr va;
 	char *fullpath, *freepath;
 	int error;
 
-	kif->kf_vnode_type = vntype_to_kinfo(vp->v_type);
+	kif->kf_un.kf_file.kf_file_type = vntype_to_kinfo(vp->v_type);
 	freepath = NULL;
 	fullpath = "-";
 	error = vn_fullpath(curthread, vp, &fullpath, &freepath);
 	if (error == 0) {
 		strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
 	}
 	if (freepath != NULL)
 		free(freepath, M_TEMP);
 
 	KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
 		vn_fill_junk(kif);
 	);
 
 	/*
 	 * Retrieve vnode attributes.
 	 */
 	va.va_fsid = VNOVAL;
 	va.va_rdev = NODEV;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	error = VOP_GETATTR(vp, &va, curthread->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	if (error != 0)
 		return (error);
 	if (va.va_fsid != VNOVAL)
 		kif->kf_un.kf_file.kf_file_fsid = va.va_fsid;
 	else
 		kif->kf_un.kf_file.kf_file_fsid =
 		    vp->v_mount->mnt_stat.f_fsid.val[0];
+	kif->kf_un.kf_file.kf_file_fsid_freebsd11 =
+	    kif->kf_un.kf_file.kf_file_fsid; /* truncate */
 	kif->kf_un.kf_file.kf_file_fileid = va.va_fileid;
 	kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode);
 	kif->kf_un.kf_file.kf_file_size = va.va_size;
 	kif->kf_un.kf_file.kf_file_rdev = va.va_rdev;
+	kif->kf_un.kf_file.kf_file_rdev_freebsd11 =
+	    kif->kf_un.kf_file.kf_file_rdev; /* truncate */
 	return (0);
 }
 
 int
 vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 #ifdef HWPMC_HOOKS
 	struct pmckern_map_in pkm;
 #endif
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	boolean_t writecounted;
 	int error;
 
 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
     defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
 	/*
 	 * POSIX shared-memory objects are defined to have
 	 * kernel persistence, and are not defined to support
 	 * read(2)/write(2) -- or even open(2).  Thus, we can
 	 * use MAP_ASYNC to trade on-disk coherence for speed.
 	 * The shm_open(3) library routine turns on the FPOSIXSHM
 	 * flag to request this behavior.
 	 */
 	if ((fp->f_flag & FPOSIXSHM) != 0)
 		flags |= MAP_NOSYNC;
 #endif
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.  Note that we only worry about
 	 * writability if mapping is shared; in this case,
 	 * current and max prot are dictated by the open file.
 	 * XXX use the vnode instead?  Problem is: what
 	 * credentials do we use for determination? What if
 	 * proc does a setuid?
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	} else {
 		maxprot |= VM_PROT_WRITE;
 		cap_maxprot |= VM_PROT_WRITE;
 	}
 	maxprot &= cap_maxprot;
 
 	/*
 	 * For regular files and shared memory, POSIX requires that
 	 * the value of foff be a legitimate offset within the data
 	 * object.  In particular, negative offsets are invalid.
 	 * Blocking negative offsets and overflows here avoids
 	 * possible wraparound or user-level access into reserved
 	 * ranges of the data object later.  In contrast, POSIX does
 	 * not dictate how offsets are used by device drivers, so in
 	 * the case of a device mapping a negative offset is passed
 	 * on.
 	 */
 	if (
 #ifdef _LP64
 	    size > OFF_MAX ||
 #endif
 	    foff < 0 || foff > OFF_MAX - size)
 		return (EINVAL);
 
 	writecounted = FALSE;
 	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
 	    &foff, &object, &writecounted);
 	if (error != 0)
 		return (error);
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, writecounted, td);
 	if (error != 0) {
 		/*
 		 * If this mapping was accounted for in the vnode's
 		 * writecount, then undo that now.
 		 */
 		if (writecounted)
 			vnode_pager_release_writecount(object, 0, size);
 		vm_object_deallocate(object);
 	}
 #ifdef HWPMC_HOOKS
 	/* Inform hwpmc(4) if an executable is being mapped. */
 	if (PMC_HOOK_INSTALLED(PMC_FN_MMAP)) {
 		if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) {
 			pkm.pm_file = vp;
 			pkm.pm_address = (uintptr_t) *addr;
 			PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
 		}
 	}
 #endif
 	return (error);
 }
Index: head/sys/nlm/nlm_advlock.c
===================================================================
--- head/sys/nlm/nlm_advlock.c	(revision 318735)
+++ head/sys/nlm/nlm_advlock.c	(revision 318736)
@@ -1,1273 +1,1274 @@
 /*-
  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
  * Authors: Doug Rabson <dfr@rabson.org>
  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfsclient/nfsmount.h>
 
 #include <nlm/nlm_prot.h>
 #include <nlm/nlm.h>
 
 /*
  * We need to keep track of the svid values used for F_FLOCK locks.
  */
 struct nlm_file_svid {
 	int		ns_refs;	/* thread count + 1 if active */
 	int		ns_svid;	/* on-the-wire SVID for this file */
 	struct ucred	*ns_ucred;	/* creds to use for lock recovery */
 	void		*ns_id;		/* local struct file pointer */
 	bool_t		ns_active;	/* TRUE if we own a lock */
 	LIST_ENTRY(nlm_file_svid) ns_link;
 };
 LIST_HEAD(nlm_file_svid_list, nlm_file_svid);
 
 #define NLM_SVID_HASH_SIZE	256
 struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE];
 
 struct mtx nlm_svid_lock;
 static struct unrhdr *nlm_svid_allocator;
 static volatile u_int nlm_xid = 1;
 
 static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim);
 static int nlm_clearlock(struct nlm_host *host,  struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size);
 static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size);
 static int nlm_map_status(nlm4_stats stat);
 static struct nlm_file_svid *nlm_find_svid(void *id);
 static void nlm_free_svid(struct nlm_file_svid *nf);
 static int nlm_init_lock(struct flock *fl, int flags, int svid,
     rpcvers_t vers, size_t fhlen, void *fh, off_t size,
     struct nlm4_lock *lock, char oh_space[32]);
 
 static void
 nlm_client_init(void *dummy)
 {
 	int i;
 
 	mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF);
 	/* pid_max cannot be greater than PID_MAX */
 	nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock);
 	for (i = 0; i < NLM_SVID_HASH_SIZE; i++)
 		LIST_INIT(&nlm_file_svids[i]);
 }
 SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL);
 
 static int
 nlm_msg(struct thread *td, const char *server, const char *msg, int error)
 {
 	struct proc *p;
 
 	p = td ? td->td_proc : NULL;
 	if (error) {
 		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
 		    msg, error);
 	} else {
 		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
 	}
 	return (0);
 }
 
 struct nlm_feedback_arg {
 	bool_t	nf_printed;
 	struct nfsmount *nf_nmp;
 };
 
 static void
 nlm_down(struct nlm_feedback_arg *nf, struct thread *td,
     const char *msg, int error)
 {
 	struct nfsmount *nmp = nf->nf_nmp;
 
 	if (nmp == NULL)
 		return;
 	mtx_lock(&nmp->nm_mtx);
 	if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
 		nmp->nm_state |= NFSSTA_LOCKTIMEO;
 		mtx_unlock(&nmp->nm_mtx);
 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
 		    VQ_NOTRESPLOCK, 0);
 	} else {
 		mtx_unlock(&nmp->nm_mtx);
 	}
 
 	nf->nf_printed = TRUE;
 	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
 }
 
 static void
 nlm_up(struct nlm_feedback_arg *nf, struct thread *td,
     const char *msg)
 {
 	struct nfsmount *nmp = nf->nf_nmp;
 
 	if (!nf->nf_printed)
 		return;
 
 	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
 
 	mtx_lock(&nmp->nm_mtx);
 	if (nmp->nm_state & NFSSTA_LOCKTIMEO) {
 		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 		mtx_unlock(&nmp->nm_mtx);
 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
 		    VQ_NOTRESPLOCK, 1);
 	} else {
 		mtx_unlock(&nmp->nm_mtx);
 	}
 }
 
 static void
 nlm_feedback(int type, int proc, void *arg)
 {
 	struct thread *td = curthread;
 	struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg;
 
 	switch (type) {
 	case FEEDBACK_REXMIT2:
 	case FEEDBACK_RECONNECT:
 		nlm_down(nf, td, "lockd not responding", 0);
 		break;
 
 	case FEEDBACK_OK:
 		nlm_up(nf, td, "lockd is alive again");
 		break;
 	}
 }
 
 /*
  * nlm_advlock --
  *      NFS advisory byte-level locks.
  */
 static int
 nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl,
     int flags, bool_t reclaim, bool_t unlock_vp)
 {
 	struct thread *td = curthread;
 	struct nfsmount *nmp;
 	off_t size;
 	size_t fhlen;
 	union nfsfh fh;
 	struct sockaddr *sa;
 	struct sockaddr_storage ss;
-	char servername[MNAMELEN];
+	char *servername;
 	struct timeval timo;
 	int retries;
 	rpcvers_t vers;
 	struct nlm_host *host;
 	struct rpc_callextra ext;
 	struct nlm_feedback_arg nf;
 	AUTH *auth;
 	struct ucred *cred, *cred1;
 	struct nlm_file_svid *ns;
 	int svid;
 	int error;
 	int is_v3;
 
 	ASSERT_VOP_LOCKED(vp, "nlm_advlock_1");
 
+	servername = malloc(MNAMELEN, M_TEMP, M_WAITOK); /* XXXKIB vp locked */
 	nmp = VFSTONFS(vp->v_mount);
 	/*
 	 * Push any pending writes to the server and flush our cache
 	 * so that if we are contending with another machine for a
 	 * file, we get whatever they wrote and vice-versa.
 	 */
 	if (op == F_SETLK || op == F_UNLCK)
 		nmp->nm_vinvalbuf(vp, V_SAVE, td, 1);
 
 	strcpy(servername, nmp->nm_hostname);
 	nmp->nm_getinfo(vp, fh.fh_bytes, &fhlen, &ss, &is_v3, &size, &timo);
 	sa = (struct sockaddr *) &ss;
 	if (is_v3 != 0)
 		vers = NLM_VERS4;
 	else
 		vers = NLM_VERS;
 
 	if (nmp->nm_flag & NFSMNT_SOFT)
 		retries = nmp->nm_retry;
 	else
 		retries = INT_MAX;
 
 	/*
 	 * We need to switch to mount-point creds so that we can send
 	 * packets from a privileged port.  Reference mnt_cred and
 	 * switch to them before unlocking the vnode, since mount
 	 * point could be unmounted right after unlock.
 	 */
 	cred = td->td_ucred;
 	td->td_ucred = vp->v_mount->mnt_cred;
 	crhold(td->td_ucred);
 	if (unlock_vp)
 		VOP_UNLOCK(vp, 0);
 
 	host = nlm_find_host_by_name(servername, sa, vers);
 	auth = authunix_create(cred);
 	memset(&ext, 0, sizeof(ext));
 
 	nf.nf_printed = FALSE;
 	nf.nf_nmp = nmp;
 	ext.rc_auth = auth;
 
 	ext.rc_feedback = nlm_feedback;
 	ext.rc_feedback_arg = &nf;
 	ext.rc_timers = NULL;
 
 	ns = NULL;
 	if (flags & F_FLOCK) {
 		ns = nlm_find_svid(id);
 		KASSERT(fl->l_start == 0 && fl->l_len == 0,
 		    ("F_FLOCK lock requests must be whole-file locks"));
 		if (!ns->ns_ucred) {
 			/*
 			 * Remember the creds used for locking in case
 			 * we need to recover the lock later.
 			 */
 			ns->ns_ucred = crdup(cred);
 		}
 		svid = ns->ns_svid;
 	} else if (flags & F_REMOTE) {
 		/*
 		 * If we are recovering after a server restart or
 		 * trashing locks on a force unmount, use the same
 		 * svid as last time.
 		 */
 		svid = fl->l_pid;
 	} else {
 		svid = ((struct proc *) id)->p_pid;
 	}
 
 	switch(op) {
 	case F_SETLK:
 		if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT)
 		    && fl->l_type == F_WRLCK) {
 			/*
 			 * The semantics for flock(2) require that any
 			 * shared lock on the file must be released
 			 * before an exclusive lock is granted. The
 			 * local locking code interprets this by
 			 * unlocking the file before sleeping on a
 			 * blocked exclusive lock request. We
 			 * approximate this by first attempting
 			 * non-blocking and if that fails, we unlock
 			 * the file and block.
 			 */
 			error = nlm_setlock(host, &ext, vers, &timo, retries,
 			    vp, F_SETLK, fl, flags & ~F_WAIT,
 			    svid, fhlen, &fh.fh_bytes, size, reclaim);
 			if (error == EAGAIN) {
 				fl->l_type = F_UNLCK;
 				error = nlm_clearlock(host, &ext, vers, &timo,
 				    retries, vp, F_UNLCK, fl, flags,
 				    svid, fhlen, &fh.fh_bytes, size);
 				fl->l_type = F_WRLCK;
 				if (!error) {
 					mtx_lock(&nlm_svid_lock);
 					if (ns->ns_active) {
 						ns->ns_refs--;
 						ns->ns_active = FALSE;
 					}
 					mtx_unlock(&nlm_svid_lock);
 					flags |= F_WAIT;
 					error = nlm_setlock(host, &ext, vers,
 					    &timo, retries, vp, F_SETLK, fl,
 					    flags, svid, fhlen, &fh.fh_bytes,
 					    size, reclaim);
 				}
 			}
 		} else {
 			error = nlm_setlock(host, &ext, vers, &timo, retries,
 			    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes,
 			    size, reclaim);
 		}
 		if (!error && ns) {
 			mtx_lock(&nlm_svid_lock);
 			if (!ns->ns_active) {
 				/*
 				 * Add one to the reference count to
 				 * hold onto the SVID for the lifetime
 				 * of the lock. Note that since
 				 * F_FLOCK only supports whole-file
 				 * locks, there can only be one active
 				 * lock for this SVID.
 				 */
 				ns->ns_refs++;
 				ns->ns_active = TRUE;
 			}
 			mtx_unlock(&nlm_svid_lock);
 		}
 		break;
 
 	case F_UNLCK:
 		error = nlm_clearlock(host, &ext, vers, &timo, retries,
 		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
 		if (!error && ns) {
 			mtx_lock(&nlm_svid_lock);
 			if (ns->ns_active) {
 				ns->ns_refs--;
 				ns->ns_active = FALSE;
 			}
 			mtx_unlock(&nlm_svid_lock);
 		}
 		break;
 
 	case F_GETLK:
 		error = nlm_getlock(host, &ext, vers, &timo, retries,
 		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	if (ns)
 		nlm_free_svid(ns);
 
 	cred1 = td->td_ucred;
 	td->td_ucred = cred;
 	crfree(cred1);
 	AUTH_DESTROY(auth);
 
 	nlm_host_release(host);
-
+	free(servername, M_TEMP);
 	return (error);
 }
 
 int
 nlm_advlock(struct vop_advlock_args *ap)
 {
 
 	return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl,
 		ap->a_flags, FALSE, TRUE));
 }
 
 /*
  * Set the creds of td to the creds of the given lock's owner. The new
  * creds reference count will be incremented via crhold. The caller is
  * responsible for calling crfree and restoring td's original creds.
  */
 static void
 nlm_set_creds_for_lock(struct thread *td, struct flock *fl)
 {
 	int i;
 	struct nlm_file_svid *ns;
 	struct proc *p;
 	struct ucred *cred;
 
 	cred = NULL;
 	if (fl->l_pid > PID_MAX) {
 		/*
 		 * If this was originally a F_FLOCK-style lock, we
 		 * recorded the creds used when it was originally
 		 * locked in the nlm_file_svid structure.
 		 */
 		mtx_lock(&nlm_svid_lock);
 		for (i = 0; i < NLM_SVID_HASH_SIZE; i++) {
 			for (ns = LIST_FIRST(&nlm_file_svids[i]); ns;
 			     ns = LIST_NEXT(ns, ns_link)) {
 				if (ns->ns_svid == fl->l_pid) {
 					cred = crhold(ns->ns_ucred);
 					break;
 				}
 			}
 		}
 		mtx_unlock(&nlm_svid_lock);
 	} else {
 		/*
 		 * This lock is owned by a process. Get a reference to
 		 * the process creds.
 		 */
 		p = pfind(fl->l_pid);
 		if (p) {
 			cred = crhold(p->p_ucred);
 			PROC_UNLOCK(p);
 		}
 	}
 
 	/*
 	 * If we can't find a cred, fall back on the recovery
 	 * thread's cred.
 	 */
 	if (!cred) {
 		cred = crhold(td->td_ucred);
 	}
 
 	td->td_ucred = cred;
 }
 
 static int
 nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg)
 {
 	struct flock newfl;
 	struct thread *td = curthread;
 	struct ucred *oldcred;
 	int error;
 
 	newfl = *fl;
 	newfl.l_type = F_UNLCK;
 
 	oldcred = td->td_ucred;
 	nlm_set_creds_for_lock(td, &newfl);
 
 	error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE,
 	    FALSE, FALSE);
 
 	crfree(td->td_ucred);
 	td->td_ucred = oldcred;
 
 	return (error);
 }
 
 int
 nlm_reclaim(struct vop_reclaim_args *ap)
 {
 
 	nlm_cancel_wait(ap->a_vp);
 	lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL);
 	return (0);
 }
 
 struct nlm_recovery_context {
 	struct nlm_host	*nr_host;	/* host we are recovering */
 	int		nr_state;	/* remote NSM state for recovery */
 };
 
 static int
 nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg)
 {
 	struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg;
 	struct thread *td = curthread;
 	struct ucred *oldcred;
 	int state, error;
 
 	/*
 	 * If the remote NSM state changes during recovery, the host
 	 * must have rebooted a second time. In that case, we must
 	 * restart the recovery.
 	 */
 	state = nlm_host_get_state(nr->nr_host);
 	if (nr->nr_state != state)
 		return (ERESTART);
 
 	error = vn_lock(vp, LK_SHARED);
 	if (error)
 		return (error);
 
 	oldcred = td->td_ucred;
 	nlm_set_creds_for_lock(td, fl);
 
 	error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE,
 	    TRUE, TRUE);
 
 	crfree(td->td_ucred);
 	td->td_ucred = oldcred;
 
 	return (error);
 }
 
 void
 nlm_client_recovery(struct nlm_host *host)
 {
 	struct nlm_recovery_context nr;
 	int sysid, error;
 
 	sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host);
 	do {
 		nr.nr_host = host;
 		nr.nr_state = nlm_host_get_state(host);
 		error = lf_iteratelocks_sysid(sysid,
 		    nlm_client_recover_lock, &nr);
 	} while (error == ERESTART);
 }
 
 static void
 nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src)
 {
 
 	dst->caller_name = src->caller_name;
 	dst->fh = src->fh;
 	dst->oh = src->oh;
 	dst->svid = src->svid;
 	dst->l_offset = src->l_offset;
 	dst->l_len = src->l_len;
 }
 
 static void
 nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src)
 {
 
 	dst->exclusive = src->exclusive;
 	dst->svid = src->svid;
 	dst->oh = src->oh;
 	dst->l_offset = src->l_offset;
 	dst->l_len = src->l_len;
 }
 
 static void
 nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src)
 {
 	dst->cookie = src->cookie;
 	dst->stat.stat = (enum nlm4_stats) src->stat.stat;
 }
 
 static enum clnt_stat
 nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client,
     struct rpc_callextra *ext, struct timeval timo)
 {
 	if (vers == NLM_VERS4) {
 		return nlm4_test_4(args, res, client, ext, timo);
 	} else {
 		nlm_testargs args1;
 		nlm_testres res1;
 		enum clnt_stat stat;
 
 		args1.cookie = args->cookie;
 		args1.exclusive = args->exclusive;
 		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
 		memset(&res1, 0, sizeof(res1));
 
 		stat = nlm_test_1(&args1, &res1, client, ext, timo);
 
 		if (stat == RPC_SUCCESS) {
 			res->cookie = res1.cookie;
 			res->stat.stat = (enum nlm4_stats) res1.stat.stat;
 			if (res1.stat.stat == nlm_denied)
 				nlm_convert_to_nlm4_holder(
 					&res->stat.nlm4_testrply_u.holder,
 					&res1.stat.nlm_testrply_u.holder);
 		}
 
 		return (stat);
 	}
 }
 
 static enum clnt_stat
 nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client,
     struct rpc_callextra *ext, struct timeval timo)
 {
 	if (vers == NLM_VERS4) {
 		return nlm4_lock_4(args, res, client, ext, timo);
 	} else {
 		nlm_lockargs args1;
 		nlm_res res1;
 		enum clnt_stat stat;
 
 		args1.cookie = args->cookie;
 		args1.block = args->block;
 		args1.exclusive = args->exclusive;
 		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
 		args1.reclaim = args->reclaim;
 		args1.state = args->state;
 		memset(&res1, 0, sizeof(res1));
 
 		stat = nlm_lock_1(&args1, &res1, client, ext, timo);
 
 		if (stat == RPC_SUCCESS) {
 			nlm_convert_to_nlm4_res(res, &res1);
 		}
 
 		return (stat);
 	}
 }
 
 static enum clnt_stat
 nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client,
     struct rpc_callextra *ext, struct timeval timo)
 {
 	if (vers == NLM_VERS4) {
 		return nlm4_cancel_4(args, res, client, ext, timo);
 	} else {
 		nlm_cancargs args1;
 		nlm_res res1;
 		enum clnt_stat stat;
 
 		args1.cookie = args->cookie;
 		args1.block = args->block;
 		args1.exclusive = args->exclusive;
 		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
 		memset(&res1, 0, sizeof(res1));
 
 		stat = nlm_cancel_1(&args1, &res1, client, ext, timo);
 
 		if (stat == RPC_SUCCESS) {
 			nlm_convert_to_nlm4_res(res, &res1);
 		}
 
 		return (stat);
 	}
 }
 
 static enum clnt_stat
 nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client,
     struct rpc_callextra *ext, struct timeval timo)
 {
 	if (vers == NLM_VERS4) {
 		return nlm4_unlock_4(args, res, client, ext, timo);
 	} else {
 		nlm_unlockargs args1;
 		nlm_res res1;
 		enum clnt_stat stat;
 
 		args1.cookie = args->cookie;
 		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
 		memset(&res1, 0, sizeof(res1));
 
 		stat = nlm_unlock_1(&args1, &res1, client, ext, timo);
 
 		if (stat == RPC_SUCCESS) {
 			nlm_convert_to_nlm4_res(res, &res1);
 		}
 
 		return (stat);
 	}
 }
 
 /*
  * Called after a lock request (set or clear) succeeded. We record the
  * details in the local lock manager. Note that since the remote
  * server has granted the lock, we can be sure that it doesn't
  * conflict with any other locks we have in the local lock manager.
  *
  * Since it is possible that host may also make NLM client requests to
  * our NLM server, we use a different sysid value to record our own
  * client locks.
  *
  * Note that since it is possible for us to receive replies from the
  * server in a different order than the locks were granted (e.g. if
  * many local threads are contending for the same lock), we must use a
  * blocking operation when registering with the local lock manager.
  * We expect that any actual wait will be rare and short hence we
  * ignore signals for this.
  */
 static void
 nlm_record_lock(struct vnode *vp, int op, struct flock *fl,
     int svid, int sysid, off_t size)
 {
 	struct vop_advlockasync_args a;
 	struct flock newfl;
 	struct proc *p;
 	int error, stops_deferred;
 
 	a.a_vp = vp;
 	a.a_id = NULL;
 	a.a_op = op;
 	a.a_fl = &newfl;
 	a.a_flags = F_REMOTE|F_WAIT|F_NOINTR;
 	a.a_task = NULL;
 	a.a_cookiep = NULL;
 	newfl.l_start = fl->l_start;
 	newfl.l_len = fl->l_len;
 	newfl.l_type = fl->l_type;
 	newfl.l_whence = fl->l_whence;
 	newfl.l_pid = svid;
 	newfl.l_sysid = NLM_SYSID_CLIENT | sysid;
 
 	for (;;) {
 		error = lf_advlockasync(&a, &vp->v_lockf, size);
 		if (error == EDEADLK) {
 			/*
 			 * Locks are associated with the processes and
 			 * not with threads.  Suppose we have two
 			 * threads A1 A2 in one process, A1 locked
 			 * file f1, A2 is locking file f2, and A1 is
 			 * unlocking f1. Then remote server may
 			 * already unlocked f1, while local still not
 			 * yet scheduled A1 to make the call to local
 			 * advlock manager. The process B owns lock on
 			 * f2 and issued the lock on f1.  Remote would
 			 * grant B the request on f1, but local would
 			 * return EDEADLK.
 			*/
 			pause("nlmdlk", 1);
 			p = curproc;
 			stops_deferred = sigdeferstop(SIGDEFERSTOP_OFF);
 			PROC_LOCK(p);
 			thread_suspend_check(0);
 			PROC_UNLOCK(p);
 			sigallowstop(stops_deferred);
 		} else if (error == EINTR) {
 			/*
 			 * lf_purgelocks() might wake up the lock
 			 * waiter and removed our lock graph edges.
 			 * There is no sense in re-trying recording
 			 * the lock to the local manager after
 			 * reclaim.
 			 */
 			error = 0;
 			break;
 		} else
 			break;
 	}
 	KASSERT(error == 0 || error == ENOENT,
 	    ("Failed to register NFS lock locally - error=%d", error));
 }
 
 static int
 nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim)
 {
 	struct nlm4_lockargs args;
 	char oh_space[32];
 	struct nlm4_res res;
 	u_int xid;
 	CLIENT *client;
 	enum clnt_stat stat;
 	int retry, block, exclusive;
 	void *wait_handle = NULL;
 	int error;
 
 	memset(&args, 0, sizeof(args));
 	memset(&res, 0, sizeof(res));
 
 	block = (flags & F_WAIT) ? TRUE : FALSE;
 	exclusive = (fl->l_type == F_WRLCK);
 
 	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
 	    &args.alock, oh_space);
 	if (error)
 		return (error);
 	args.block = block;
 	args.exclusive = exclusive;
 	args.reclaim = reclaim;
 	args.state = nlm_nsm_state;
 
 	retry = 5*hz;
 	for (;;) {
 		client = nlm_host_get_rpc(host, FALSE);
 		if (!client)
 			return (ENOLCK); /* XXX retry? */
 
 		if (block)
 			wait_handle = nlm_register_wait_lock(&args.alock, vp);
 
 		xid = atomic_fetchadd_int(&nlm_xid, 1);
 		args.cookie.n_len = sizeof(xid);
 		args.cookie.n_bytes = (char*) &xid;
 
 		stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo);
 
 		CLNT_RELEASE(client);
 
 		if (stat != RPC_SUCCESS) {
 			if (block)
 				nlm_deregister_wait_lock(wait_handle);
 			if (retries) {
 				retries--;
 				continue;
 			}
 			return (EINVAL);
 		}
 
 		/*
 		 * Free res.cookie.
 		 */
 		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
 
 		if (block && res.stat.stat != nlm4_blocked)
 			nlm_deregister_wait_lock(wait_handle);
 
 		if (res.stat.stat == nlm4_denied_grace_period) {
 			/*
 			 * The server has recently rebooted and is
 			 * giving old clients a change to reclaim
 			 * their locks. Wait for a few seconds and try
 			 * again.
 			 */
 			error = tsleep(&args, PCATCH, "nlmgrace", retry);
 			if (error && error != EWOULDBLOCK)
 				return (error);
 			retry = 2*retry;
 			if (retry > 30*hz)
 				retry = 30*hz;
 			continue;
 		}
 
 		if (block && res.stat.stat == nlm4_blocked) {
 			/*
 			 * The server should call us back with a
 			 * granted message when the lock succeeds. In
 			 * order to deal with broken servers, lost
 			 * granted messages and server reboots, we
 			 * will also re-try every few seconds.
 			 */
 			error = nlm_wait_lock(wait_handle, retry);
 			if (error == EWOULDBLOCK) {
 				retry = 2*retry;
 				if (retry > 30*hz)
 					retry = 30*hz;
 				continue;
 			}
 			if (error) {
 				/*
 				 * We need to call the server to
 				 * cancel our lock request.
 				 */
 				nlm4_cancargs cancel;
 
 				memset(&cancel, 0, sizeof(cancel));
 
 				xid = atomic_fetchadd_int(&nlm_xid, 1);
 				cancel.cookie.n_len = sizeof(xid);
 				cancel.cookie.n_bytes = (char*) &xid;
 				cancel.block = block;
 				cancel.exclusive = exclusive;
 				cancel.alock = args.alock;
 
 				do {
 					client = nlm_host_get_rpc(host, FALSE);
 					if (!client)
 						/* XXX retry? */
 						return (ENOLCK);
 
 					stat = nlm_cancel_rpc(vers, &cancel,
 					    &res, client, ext, *timo);
 
 					CLNT_RELEASE(client);
 
 					if (stat != RPC_SUCCESS) {
 						/*
 						 * We need to cope
 						 * with temporary
 						 * network partitions
 						 * as well as server
 						 * reboots. This means
 						 * we have to keep
 						 * trying to cancel
 						 * until the server
 						 * wakes up again.
 						 */
 						pause("nlmcancel", 10*hz);
 					}
 				} while (stat != RPC_SUCCESS);
 
 				/*
 				 * Free res.cookie.
 				 */
 				xdr_free((xdrproc_t) xdr_nlm4_res, &res);
 
 				switch (res.stat.stat) {
 				case nlm_denied:
 					/*
 					 * There was nothing
 					 * to cancel. We are
 					 * going to go ahead
 					 * and assume we got
 					 * the lock.
 					 */
 					error = 0;
 					break;
 
 				case nlm4_denied_grace_period:
 					/*
 					 * The server has
 					 * recently rebooted -
 					 * treat this as a
 					 * successful
 					 * cancellation.
 					 */
 					break;
 
 				case nlm4_granted:
 					/*
 					 * We managed to
 					 * cancel.
 					 */
 					break;
 
 				default:
 					/*
 					 * Broken server
 					 * implementation -
 					 * can't really do
 					 * anything here.
 					 */
 					break;
 				}
 
 			}
 		} else {
 			error = nlm_map_status(res.stat.stat);
 		}
 
 		if (!error && !reclaim) {
 			nlm_record_lock(vp, op, fl, args.alock.svid,
 			    nlm_host_get_sysid(host), size);
 			nlm_host_monitor(host, 0);
 		}
 
 		return (error);
 	}
 }
 
 static int
 nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size)
 {
 	struct nlm4_unlockargs args;
 	char oh_space[32];
 	struct nlm4_res res;
 	u_int xid;
 	CLIENT *client;
 	enum clnt_stat stat;
 	int error;
 
 	memset(&args, 0, sizeof(args));
 	memset(&res, 0, sizeof(res));
 
 	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
 	    &args.alock, oh_space);
 	if (error)
 		return (error);
 
 	for (;;) {
 		client = nlm_host_get_rpc(host, FALSE);
 		if (!client)
 			return (ENOLCK); /* XXX retry? */
 
 		xid = atomic_fetchadd_int(&nlm_xid, 1);
 		args.cookie.n_len = sizeof(xid);
 		args.cookie.n_bytes = (char*) &xid;
 
 		stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo);
 
 		CLNT_RELEASE(client);
 
 		if (stat != RPC_SUCCESS) {
 			if (retries) {
 				retries--;
 				continue;
 			}
 			return (EINVAL);
 		}
 
 		/*
 		 * Free res.cookie.
 		 */
 		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
 
 		if (res.stat.stat == nlm4_denied_grace_period) {
 			/*
 			 * The server has recently rebooted and is
 			 * giving old clients a change to reclaim
 			 * their locks. Wait for a few seconds and try
 			 * again.
 			 */
 			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
 			if (error && error != EWOULDBLOCK)
 				return (error);
 			continue;
 		}
 
 		/*
 		 * If we are being called via nlm_reclaim (which will
 		 * use the F_REMOTE flag), don't record the lock
 		 * operation in the local lock manager since the vnode
 		 * is going away.
 		 */
 		if (!(flags & F_REMOTE))
 			nlm_record_lock(vp, op, fl, args.alock.svid,
 			    nlm_host_get_sysid(host), size);
 
 		return (0);
 	}
 }
 
 static int
 nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
     rpcvers_t vers, struct timeval *timo, int retries,
     struct vnode *vp, int op, struct flock *fl, int flags,
     int svid, size_t fhlen, void *fh, off_t size)
 {
 	struct nlm4_testargs args;
 	char oh_space[32];
 	struct nlm4_testres res;
 	u_int xid;
 	CLIENT *client;
 	enum clnt_stat stat;
 	int exclusive;
 	int error;
 
 	KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK"));
 
 	memset(&args, 0, sizeof(args));
 	memset(&res, 0, sizeof(res));
 
 	exclusive = (fl->l_type == F_WRLCK);
 
 	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
 	    &args.alock, oh_space);
 	if (error)
 		return (error);
 	args.exclusive = exclusive;
 
 	for (;;) {
 		client = nlm_host_get_rpc(host, FALSE);
 		if (!client)
 			return (ENOLCK); /* XXX retry? */
 
 		xid = atomic_fetchadd_int(&nlm_xid, 1);
 		args.cookie.n_len = sizeof(xid);
 		args.cookie.n_bytes = (char*) &xid;
 
 		stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo);
 
 		CLNT_RELEASE(client);
 
 		if (stat != RPC_SUCCESS) {
 			if (retries) {
 				retries--;
 				continue;
 			}
 			return (EINVAL);
 		}
 
 		if (res.stat.stat == nlm4_denied_grace_period) {
 			/*
 			 * The server has recently rebooted and is
 			 * giving old clients a change to reclaim
 			 * their locks. Wait for a few seconds and try
 			 * again.
 			 */
 			xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
 			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
 			if (error && error != EWOULDBLOCK)
 				return (error);
 			continue;
 		}
 
 		if (res.stat.stat == nlm4_denied) {
 			struct nlm4_holder *h =
 				&res.stat.nlm4_testrply_u.holder;
 			fl->l_start = h->l_offset;
 			fl->l_len = h->l_len;
 			fl->l_pid = h->svid;
 			if (h->exclusive)
 				fl->l_type = F_WRLCK;
 			else
 				fl->l_type = F_RDLCK;
 			fl->l_whence = SEEK_SET;
 			fl->l_sysid = 0;
 		} else {
 			fl->l_type = F_UNLCK;
 		}
 
 		xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
 
 		return (0);
 	}
 }
 
 static int
 nlm_map_status(nlm4_stats stat)
 {
 	switch (stat) {
 	case nlm4_granted:
 		return (0);
 
 	case nlm4_denied:
 		return (EAGAIN);
 
 	case nlm4_denied_nolocks:
 		return (ENOLCK);
 
 	case nlm4_deadlck:
 		return (EDEADLK);
 
 	case nlm4_rofs:
 		return (EROFS);
 
 	case nlm4_stale_fh:
 		return (ESTALE);
 
 	case nlm4_fbig:
 		return (EFBIG);
 
 	case nlm4_failed:
 		return (EACCES);
 
 	default:
 		return (EINVAL);
 	}
 }
 
 static struct nlm_file_svid *
 nlm_find_svid(void *id)
 {
 	struct nlm_file_svid *ns, *newns;
 	int h;
 
 	h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE;
 
 	mtx_lock(&nlm_svid_lock);
 	LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
 		if (ns->ns_id == id) {
 			ns->ns_refs++;
 			break;
 		}
 	}
 	mtx_unlock(&nlm_svid_lock);
 	if (!ns) {
 		int svid = alloc_unr(nlm_svid_allocator);
 		newns = malloc(sizeof(struct nlm_file_svid), M_NLM,
 		    M_WAITOK);
 		newns->ns_refs = 1;
 		newns->ns_id = id;
 		newns->ns_svid = svid;
 		newns->ns_ucred = NULL;
 		newns->ns_active = FALSE;
 
 		/*
 		 * We need to check for a race with some other
 		 * thread allocating a svid for this file.
 		 */
 		mtx_lock(&nlm_svid_lock);
 		LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
 			if (ns->ns_id == id) {
 				ns->ns_refs++;
 				break;
 			}
 		}
 		if (ns) {
 			mtx_unlock(&nlm_svid_lock);
 			free_unr(nlm_svid_allocator, newns->ns_svid);
 			free(newns, M_NLM);
 		} else {
 			LIST_INSERT_HEAD(&nlm_file_svids[h], newns,
 			    ns_link);
 			ns = newns;
 			mtx_unlock(&nlm_svid_lock);
 		}
 	}
 
 	return (ns);
 }
 
 static void
 nlm_free_svid(struct nlm_file_svid *ns)
 {
 
 	mtx_lock(&nlm_svid_lock);
 	ns->ns_refs--;
 	if (!ns->ns_refs) {
 		KASSERT(!ns->ns_active, ("Freeing active SVID"));
 		LIST_REMOVE(ns, ns_link);
 		mtx_unlock(&nlm_svid_lock);
 		free_unr(nlm_svid_allocator, ns->ns_svid);
 		if (ns->ns_ucred)
 			crfree(ns->ns_ucred);
 		free(ns, M_NLM);
 	} else {
 		mtx_unlock(&nlm_svid_lock);
 	}
 }
 
 static int
 nlm_init_lock(struct flock *fl, int flags, int svid,
     rpcvers_t vers, size_t fhlen, void *fh, off_t size,
     struct nlm4_lock *lock, char oh_space[32])
 {
 	size_t oh_len;
 	off_t start, len;
 
 	if (fl->l_whence == SEEK_END) {
 		if (size > OFF_MAX
 		    || (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
 			return (EOVERFLOW);
 		start = size + fl->l_start;
 	} else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) {
 		start = fl->l_start;
 	} else {
 		return (EINVAL);
 	}
 	if (start < 0)
 		return (EINVAL);
 	if (fl->l_len < 0) {
 		len = -fl->l_len;
 		start -= len;
 		if (start < 0)
 			return (EINVAL);
 	} else {
 		len = fl->l_len;
 	}
 
 	if (vers == NLM_VERS) {
 		/*
 		 * Enforce range limits on V1 locks
 		 */
 		if (start > 0xffffffffLL || len > 0xffffffffLL)
 			return (EOVERFLOW);
 	}
 
 	snprintf(oh_space, 32, "%d@", svid);
 	oh_len = strlen(oh_space);
 	getcredhostname(NULL, oh_space + oh_len, 32 - oh_len);
 	oh_len = strlen(oh_space);
 
 	memset(lock, 0, sizeof(*lock));
 	lock->caller_name = prison0.pr_hostname;
 	lock->fh.n_len = fhlen;
 	lock->fh.n_bytes = fh;
 	lock->oh.n_len = oh_len;
 	lock->oh.n_bytes = oh_space;
 	lock->svid = svid;
 	lock->l_offset = start;
 	lock->l_len = len;
 
 	return (0);
 }
Index: head/sys/security/audit/audit_private.h
===================================================================
--- head/sys/security/audit/audit_private.h	(revision 318735)
+++ head/sys/security/audit/audit_private.h	(revision 318736)
@@ -1,500 +1,500 @@
 /*-
  * Copyright (c) 1999-2009 Apple Inc.
  * Copyright (c) 2016-2017 Robert N. M. Watson
  * All rights reserved.
  *
  * Portions of this software were developed by BAE Systems, the University of
  * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
  * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
  * Computing (TC) research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1.  Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
  *     its contributors may be used to endorse or promote products derived
  *     from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This include file contains function prototypes and type definitions used
  * within the audit implementation.
  */
 
 #ifndef _SECURITY_AUDIT_PRIVATE_H_
 #define	_SECURITY_AUDIT_PRIVATE_H_
 
 #ifndef _KERNEL
 #error "no user-serviceable parts inside"
 #endif
 
 #include <sys/caprights.h>
 #include <sys/ipc.h>
 #include <sys/socket.h>
 #include <sys/ucred.h>
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_AUDITBSM);
 MALLOC_DECLARE(M_AUDITDATA);
 MALLOC_DECLARE(M_AUDITPATH);
 MALLOC_DECLARE(M_AUDITTEXT);
 MALLOC_DECLARE(M_AUDITGIDSET);
 #endif
 
 /*
  * Audit control variables that are usually set/read via system calls and
  * used to control various aspects of auditing.
  */
 extern struct au_qctrl		audit_qctrl;
 extern struct audit_fstat	audit_fstat;
 extern struct au_mask		audit_nae_mask;
 extern int			audit_panic_on_write_fail;
 extern int			audit_fail_stop;
 extern int			audit_argv;
 extern int			audit_arge;
 
 /*
  * Success/failure conditions for the conversion of a kernel audit record to
  * BSM format.
  */
 #define	BSM_SUCCESS	0
 #define	BSM_FAILURE	1
 #define	BSM_NOAUDIT	2
 
 /*
  * Defines for the kernel audit record k_ar_commit field.  Flags are set to
  * indicate what sort of record it is, and which preselection mechanism
  * selected it.
  */
 #define	AR_COMMIT_KERNEL	0x00000001U
 #define	AR_COMMIT_USER		0x00000010U
 
 #define	AR_PRESELECT_TRAIL	0x00001000U
 #define	AR_PRESELECT_PIPE	0x00002000U
 
 #define	AR_PRESELECT_USER_TRAIL	0x00004000U
 #define	AR_PRESELECT_USER_PIPE	0x00008000U
 
 #define	AR_PRESELECT_DTRACE	0x00010000U
 
 /*
  * Audit data is generated as a stream of struct audit_record structures,
  * linked by struct kaudit_record, and contain storage for possible audit so
  * that it will not need to be allocated during the processing of a system
  * call, both improving efficiency and avoiding sleeping at untimely moments.
  * This structure is converted to BSM format before being written to disk.
  */
 struct vnode_au_info {
 	mode_t	vn_mode;
 	uid_t	vn_uid;
 	gid_t	vn_gid;
-	dev_t	vn_dev;
-	long	vn_fsid;
-	long	vn_fileid;
+	u_int32_t vn_dev;		/* XXX dev_t compatibility */
+	long	vn_fsid;		/* XXX uint64_t compatibility */
+	long	vn_fileid;		/* XXX ino_t compatibility */
 	long	vn_gen;
 };
 
 struct groupset {
 	gid_t	*gidset;
 	u_int	 gidset_size;
 };
 
 struct socket_au_info {
 	int		so_domain;
 	int		so_type;
 	int		so_protocol;
 	in_addr_t	so_raddr;	/* Remote address if INET socket. */
 	in_addr_t	so_laddr;	/* Local address if INET socket. */
 	u_short		so_rport;	/* Remote port. */
 	u_short		so_lport;	/* Local port. */
 };
 
 /*
  * The following is used for A_OLDSETQCTRL and AU_OLDGETQCTRL and a 64-bit
  * userland.
  */
 struct au_qctrl64 {
 	u_int64_t	aq64_hiwater;
 	u_int64_t	aq64_lowater;
 	u_int64_t	aq64_bufsz;
 	u_int64_t	aq64_delay;
 	u_int64_t	aq64_minfree;
 };
 typedef	struct au_qctrl64	au_qctrl64_t;
 
 union auditon_udata {
 	char			*au_path;
 	int			au_cond;
 	int			au_flags;
 	int			au_policy;
 	int			au_trigger;
 	int64_t			au_cond64;
 	int64_t			au_policy64;
 	au_evclass_map_t	au_evclass;
 	au_mask_t		au_mask;
 	auditinfo_t		au_auinfo;
 	auditpinfo_t		au_aupinfo;
 	auditpinfo_addr_t	au_aupinfo_addr;
 	au_qctrl_t		au_qctrl;
 	au_qctrl64_t		au_qctrl64;
 	au_stat_t		au_stat;
 	au_fstat_t		au_fstat;
 	auditinfo_addr_t	au_kau_info;
 	au_evname_map_t		au_evname;
 };
 
 struct posix_ipc_perm {
 	uid_t	pipc_uid;
 	gid_t	pipc_gid;
 	mode_t	pipc_mode;
 };
 
 struct audit_record {
 	/* Audit record header. */
 	u_int32_t		ar_magic;
 	int			ar_event;
 	int			ar_retval; /* value returned to the process */
 	int			ar_errno;  /* return status of system call */
 	struct timespec		ar_starttime;
 	struct timespec		ar_endtime;
 	u_int64_t		ar_valid_arg;  /* Bitmask of valid arguments */
 
 	/* Audit subject information. */
 	struct xucred		ar_subj_cred;
 	uid_t			ar_subj_ruid;
 	gid_t			ar_subj_rgid;
 	gid_t			ar_subj_egid;
 	uid_t			ar_subj_auid; /* Audit user ID */
 	pid_t			ar_subj_asid; /* Audit session ID */
 	pid_t			ar_subj_pid;
 	struct au_tid		ar_subj_term;
 	struct au_tid_addr	ar_subj_term_addr;
 	struct au_mask		ar_subj_amask;
 
 	/* Operation arguments. */
 	uid_t			ar_arg_euid;
 	uid_t			ar_arg_ruid;
 	uid_t			ar_arg_suid;
 	gid_t			ar_arg_egid;
 	gid_t			ar_arg_rgid;
 	gid_t			ar_arg_sgid;
 	pid_t			ar_arg_pid;
 	pid_t			ar_arg_asid;
 	struct au_tid		ar_arg_termid;
 	struct au_tid_addr	ar_arg_termid_addr;
 	uid_t			ar_arg_uid;
 	uid_t			ar_arg_auid;
 	gid_t			ar_arg_gid;
 	struct groupset		ar_arg_groups;
 	int			ar_arg_fd;
 	int			ar_arg_atfd1;
 	int			ar_arg_atfd2;
 	int			ar_arg_fflags;
 	mode_t			ar_arg_mode;
-	int			ar_arg_dev;
+	int			ar_arg_dev;	/* XXX dev_t compatibility */
 	long			ar_arg_value;
 	void			*ar_arg_addr;
 	int			ar_arg_len;
 	int			ar_arg_mask;
 	u_int			ar_arg_signum;
 	char			ar_arg_login[MAXLOGNAME];
 	int			ar_arg_ctlname[CTL_MAXNAME];
 	struct socket_au_info	ar_arg_sockinfo;
 	char			*ar_arg_upath1;
 	char			*ar_arg_upath2;
 	char			*ar_arg_text;
 	struct au_mask		ar_arg_amask;
 	struct vnode_au_info	ar_arg_vnode1;
 	struct vnode_au_info	ar_arg_vnode2;
 	int			ar_arg_cmd;
 	int			ar_arg_svipc_which;
 	int			ar_arg_svipc_cmd;
 	struct ipc_perm		ar_arg_svipc_perm;
 	int			ar_arg_svipc_id;
 	void			*ar_arg_svipc_addr;
 	struct posix_ipc_perm	ar_arg_pipc_perm;
 	union auditon_udata	ar_arg_auditon;
 	char			*ar_arg_argv;
 	int			ar_arg_argc;
 	char			*ar_arg_envv;
 	int			ar_arg_envc;
 	int			ar_arg_exitstatus;
 	int			ar_arg_exitretval;
 	struct sockaddr_storage ar_arg_sockaddr;
 	cap_rights_t		ar_arg_rights;
 	uint32_t		ar_arg_fcntl_rights;
 	char			ar_jailname[MAXHOSTNAMELEN];
 };
 
 /*
  * Arguments in the audit record are initially not defined; flags are set to
  * indicate if they are present so they can be included in the audit log
  * stream only if defined.
  */
 #define	ARG_EUID		0x0000000000000001ULL
 #define	ARG_RUID		0x0000000000000002ULL
 #define	ARG_SUID		0x0000000000000004ULL
 #define	ARG_EGID		0x0000000000000008ULL
 #define	ARG_RGID		0x0000000000000010ULL
 #define	ARG_SGID		0x0000000000000020ULL
 #define	ARG_PID			0x0000000000000040ULL
 #define	ARG_UID			0x0000000000000080ULL
 #define	ARG_AUID		0x0000000000000100ULL
 #define	ARG_GID			0x0000000000000200ULL
 #define	ARG_FD			0x0000000000000400ULL
 #define	ARG_POSIX_IPC_PERM	0x0000000000000800ULL
 #define	ARG_FFLAGS		0x0000000000001000ULL
 #define	ARG_MODE		0x0000000000002000ULL
 #define	ARG_DEV			0x0000000000004000ULL
 #define	ARG_ADDR		0x0000000000008000ULL
 #define	ARG_LEN			0x0000000000010000ULL
 #define	ARG_MASK		0x0000000000020000ULL
 #define	ARG_SIGNUM		0x0000000000040000ULL
 #define	ARG_LOGIN		0x0000000000080000ULL
 #define	ARG_SADDRINET		0x0000000000100000ULL
 #define	ARG_SADDRINET6		0x0000000000200000ULL
 #define	ARG_SADDRUNIX		0x0000000000400000ULL
 #define	ARG_TERMID_ADDR		0x0000000000400000ULL
 #define	ARG_UNUSED2		0x0000000001000000ULL
 #define	ARG_UPATH1		0x0000000002000000ULL
 #define	ARG_UPATH2		0x0000000004000000ULL
 #define	ARG_TEXT		0x0000000008000000ULL
 #define	ARG_VNODE1		0x0000000010000000ULL
 #define	ARG_VNODE2		0x0000000020000000ULL
 #define	ARG_SVIPC_CMD		0x0000000040000000ULL
 #define	ARG_SVIPC_PERM		0x0000000080000000ULL
 #define	ARG_SVIPC_ID		0x0000000100000000ULL
 #define	ARG_SVIPC_ADDR		0x0000000200000000ULL
 #define	ARG_GROUPSET		0x0000000400000000ULL
 #define	ARG_CMD			0x0000000800000000ULL
 #define	ARG_SOCKINFO		0x0000001000000000ULL
 #define	ARG_ASID		0x0000002000000000ULL
 #define	ARG_TERMID		0x0000004000000000ULL
 #define	ARG_AUDITON		0x0000008000000000ULL
 #define	ARG_VALUE		0x0000010000000000ULL
 #define	ARG_AMASK		0x0000020000000000ULL
 #define	ARG_CTLNAME		0x0000040000000000ULL
 #define	ARG_PROCESS		0x0000080000000000ULL
 #define	ARG_MACHPORT1		0x0000100000000000ULL
 #define	ARG_MACHPORT2		0x0000200000000000ULL
 #define	ARG_EXIT		0x0000400000000000ULL
 #define	ARG_IOVECSTR		0x0000800000000000ULL
 #define	ARG_ARGV		0x0001000000000000ULL
 #define	ARG_ENVV		0x0002000000000000ULL
 #define	ARG_ATFD1		0x0004000000000000ULL
 #define	ARG_ATFD2		0x0008000000000000ULL
 #define	ARG_RIGHTS		0x0010000000000000ULL
 #define	ARG_FCNTL_RIGHTS	0x0020000000000000ULL
 #define	ARG_SVIPC_WHICH		0x0200000000000000ULL
 #define	ARG_NONE		0x0000000000000000ULL
 #define	ARG_ALL			0xFFFFFFFFFFFFFFFFULL
 
 #define	ARG_IS_VALID(kar, arg)	((kar)->k_ar.ar_valid_arg & (arg))
 #define	ARG_SET_VALID(kar, arg) do {					\
 	(kar)->k_ar.ar_valid_arg |= (arg);				\
 } while (0)
 #define	ARG_CLEAR_VALID(kar, arg) do {					\
 	(kar)->k_ar.ar_valid_arg &= ~(arg);				\
 } while (0)
 
 /*
  * In-kernel version of audit record; the basic record plus queue meta-data.
  * This record can also have a pointer set to some opaque data that will be
  * passed through to the audit writing mechanism.
  */
 struct kaudit_record {
 	struct audit_record		 k_ar;
 	u_int32_t			 k_ar_commit;
 	void				*k_udata;	/* User data. */
 	u_int				 k_ulen;	/* User data length. */
 	struct uthread			*k_uthread;	/* Audited thread. */
 	void				*k_dtaudit_state;
 	TAILQ_ENTRY(kaudit_record)	 k_q;
 };
 TAILQ_HEAD(kaudit_queue, kaudit_record);
 
 /*
  * Functions to manage the allocation, release, and commit of kernel audit
  * records.
  */
 void			 audit_abort(struct kaudit_record *ar);
 void			 audit_commit(struct kaudit_record *ar, int error,
 			    int retval);
 struct kaudit_record	*audit_new(int event, struct thread *td);
 
 /*
  * Functions relating to the conversion of internal kernel audit records to
  * the BSM file format.
  */
 struct au_record;
 int	 kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau);
 int	 bsm_rec_verify(void *rec);
 
 /*
  * Kernel versions of the libbsm audit record functions.
  */
 void	 kau_free(struct au_record *rec);
 void	 kau_init(void);
 
 /*
  * Return values for pre-selection and post-selection decisions.
  */
 #define	AU_PRS_SUCCESS	1
 #define	AU_PRS_FAILURE	2
 #define	AU_PRS_BOTH	(AU_PRS_SUCCESS|AU_PRS_FAILURE)
 
 /*
  * Data structures relating to the kernel audit queue.  Ideally, these might
  * be abstracted so that only accessor methods are exposed.
  */
 extern struct mtx		audit_mtx;
 extern struct cv		audit_watermark_cv;
 extern struct cv		audit_worker_cv;
 extern struct kaudit_queue	audit_q;
 extern int			audit_q_len;
 extern int			audit_pre_q_len;
 extern int			audit_in_failure;
 
 /*
  * Flags to use on audit files when opening and closing.
  */
 #define	AUDIT_OPEN_FLAGS	(FWRITE | O_APPEND)
 #define	AUDIT_CLOSE_FLAGS	(FWRITE | O_APPEND)
 
 /*
  * Audit event-to-name mapping structure, maintained in audit_bsm_klib.c.  It
  * appears in this header so that the DTrace audit provider can dereference
  * instances passed back in the au_evname_foreach() callbacks.  Safe access to
  * its fields requires holding ene_lock (after it is visible in the global
  * table).
  *
  * Locking:
  * (c) - Constant after inserted in the global table
  * (l) - Protected by ene_lock
  * (m) - Protected by evnamemap_lock (audit_bsm_klib.c)
  * (M) - Writes protected by evnamemap_lock; reads unprotected.
  */
 struct evname_elem {
 	au_event_t		ene_event;			/* (c) */
 	char			ene_name[EVNAMEMAP_NAME_SIZE];	/* (l) */
 	LIST_ENTRY(evname_elem)	ene_entry;			/* (m) */
 	struct mtx		ene_lock;
 
 	/* DTrace probe IDs; 0 if not yet registered. */
 	uint32_t		ene_commit_probe_id;		/* (M) */
 	uint32_t		ene_bsm_probe_id;		/* (M) */
 
 	/* Flags indicating if the probes enabled or not. */
 	int			ene_commit_probe_enabled;	/* (M) */
 	int			ene_bsm_probe_enabled;		/* (M) */
 };
 
 #define	EVNAME_LOCK(ene)	mtx_lock(&(ene)->ene_lock)
 #define	EVNAME_UNLOCK(ene)	mtx_unlock(&(ene)->ene_lock)
 
 /*
  * Callback function typedef for the same.
  */
 typedef	void	(*au_evnamemap_callback_t)(struct evname_elem *ene);
 
 /*
  * DTrace audit provider (dtaudit) hooks -- to be set non-NULL when the audit
  * provider is loaded and ready to be called into.
  */
 extern void	*(*dtaudit_hook_preselect)(au_id_t auid, au_event_t event,
 		    au_class_t class);
 extern int	(*dtaudit_hook_commit)(struct kaudit_record *kar,
 		    au_id_t auid, au_event_t event, au_class_t class,
 		    int sorf);
 extern void	(*dtaudit_hook_bsm)(struct kaudit_record *kar, au_id_t auid,
 		    au_event_t event, au_class_t class, int sorf,
 		    void *bsm_data, size_t bsm_len);
 
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 /*
  * Some of the BSM tokenizer functions take different parameters in the
  * kernel implementations in order to save the copying of large kernel data
  * structures.  The prototypes of these functions are declared here.
  */
 token_t		*kau_to_socket(struct socket_au_info *soi);
 
 /*
  * audit_klib prototypes
  */
 int		 au_preselect(au_event_t event, au_class_t class,
 		    au_mask_t *mask_p, int sorf);
 void		 au_evclassmap_init(void);
 void		 au_evclassmap_insert(au_event_t event, au_class_t class);
 au_class_t	 au_event_class(au_event_t event);
 void		 au_evnamemap_init(void);
 void		 au_evnamemap_insert(au_event_t event, const char *name);
 void		 au_evnamemap_foreach(au_evnamemap_callback_t callback);
 struct evname_elem	*au_evnamemap_lookup(au_event_t event);
 int		 au_event_name(au_event_t event, char *name);
 au_event_t	 audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg);
 au_event_t	 audit_flags_and_error_to_openevent(int oflags, int error);
 au_event_t	 audit_flags_and_error_to_openatevent(int oflags, int error);
 au_event_t	 audit_msgctl_to_event(int cmd);
 au_event_t	 audit_msgsys_to_event(int which);
 au_event_t	 audit_semctl_to_event(int cmd);
 au_event_t	 audit_semsys_to_event(int which);
 au_event_t	 audit_shmsys_to_event(int which);
 void		 audit_canon_path(struct thread *td, int dirfd, char *path,
 		    char *cpath);
 au_event_t	 auditon_command_event(int cmd);
 
 /*
  * Audit trigger events notify user space of kernel audit conditions
  * asynchronously.
  */
 void		 audit_trigger_init(void);
 int		 audit_send_trigger(unsigned int trigger);
 
 /*
  * Accessor functions to manage global audit state.
  */
 void	 audit_set_kinfo(struct auditinfo_addr *);
 void	 audit_get_kinfo(struct auditinfo_addr *);
 
 /*
  * General audit related functions.
  */
 struct kaudit_record	*currecord(void);
 void			 audit_free(struct kaudit_record *ar);
 void			 audit_shutdown(void *arg, int howto);
 void			 audit_rotate_vnode(struct ucred *cred,
 			    struct vnode *vp);
 void			 audit_worker_init(void);
 
 /*
  * Audit pipe functions.
  */
 int	 audit_pipe_preselect(au_id_t auid, au_event_t event,
 	    au_class_t class, int sorf, int trail_select);
 void	 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class,
 	    int sorf, int trail_select, void *record, u_int record_len);
 void	 audit_pipe_submit_user(void *record, u_int record_len);
 
 #endif /* ! _SECURITY_AUDIT_PRIVATE_H_ */
Index: head/sys/sys/_types.h
===================================================================
--- head/sys/sys/_types.h	(revision 318735)
+++ head/sys/sys/_types.h	(revision 318736)
@@ -1,123 +1,123 @@
 /*-
  * Copyright (c) 2002 Mike Barcroft <mike@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS__TYPES_H_
 #define _SYS__TYPES_H_
 
 #include <sys/cdefs.h>
 #include <machine/_types.h>
 
 /*
  * Standard type definitions.
  */
 typedef	__int32_t	__blksize_t;	/* file block size */
 typedef	__int64_t	__blkcnt_t;	/* file block count */
 typedef	__int32_t	__clockid_t;	/* clock_gettime()... */
 typedef	__uint32_t	__fflags_t;	/* file flags */
 typedef	__uint64_t	__fsblkcnt_t;
 typedef	__uint64_t	__fsfilcnt_t;
 typedef	__uint32_t	__gid_t;
 typedef	__int64_t	__id_t;		/* can hold a gid_t, pid_t, or uid_t */
-typedef	__uint32_t	__ino_t;	/* inode number */
+typedef	__uint64_t	__ino_t;	/* inode number */
 typedef	long		__key_t;	/* IPC key (for Sys V IPC) */
 typedef	__int32_t	__lwpid_t;	/* Thread ID (a.k.a. LWP) */
 typedef	__uint16_t	__mode_t;	/* permissions */
 typedef	int		__accmode_t;	/* access permissions */
 typedef	int		__nl_item;
-typedef	__uint16_t	__nlink_t;	/* link count */
+typedef	__uint64_t	__nlink_t;	/* link count */
 typedef	__int64_t	__off_t;	/* file offset */
 typedef	__int64_t	__off64_t;	/* file offset (alias) */
 typedef	__int32_t	__pid_t;	/* process [group] */
 typedef	__int64_t	__rlim_t;	/* resource limit - intentionally */
 					/* signed, because of legacy code */
 					/* that uses -1 for RLIM_INFINITY */
 typedef	__uint8_t	__sa_family_t;
 typedef	__uint32_t	__socklen_t;
 typedef	long		__suseconds_t;	/* microseconds (signed) */
 typedef	struct __timer	*__timer_t;	/* timer_gettime()... */
 typedef	struct __mq	*__mqd_t;	/* mq_open()... */
 typedef	__uint32_t	__uid_t;
 typedef	unsigned int	__useconds_t;	/* microseconds (unsigned) */
 typedef	int		__cpuwhich_t;	/* which parameter for cpuset. */
 typedef	int		__cpulevel_t;	/* level parameter for cpuset. */
 typedef int		__cpusetid_t;	/* cpuset identifier. */
 
 /*
  * Unusual type definitions.
  */
 /*
  * rune_t is declared to be an ``int'' instead of the more natural
  * ``unsigned long'' or ``long''.  Two things are happening here.  It is not
  * unsigned so that EOF (-1) can be naturally assigned to it and used.  Also,
  * it looks like 10646 will be a 31 bit standard.  This means that if your
  * ints cannot hold 32 bits, you will be in trouble.  The reason an int was
  * chosen over a long is that the is*() and to*() routines take ints (says
  * ANSI C), but they use __ct_rune_t instead of int.
  *
  * NOTE: rune_t is not covered by ANSI nor other standards, and should not
  * be instantiated outside of lib/libc/locale.  Use wchar_t.  wint_t and
  * rune_t must be the same type.  Also, wint_t should be able to hold all
  * members of the largest character set plus one extra value (WEOF), and
  * must be at least 16 bits.
  */
 typedef	int		__ct_rune_t;	/* arg type for ctype funcs */
 typedef	__ct_rune_t	__rune_t;	/* rune_t (see above) */
 typedef	__ct_rune_t	__wint_t;	/* wint_t (see above) */
 
 /* Clang already provides these types as built-ins, but only in C++ mode. */
 #if !defined(__clang__) || !defined(__cplusplus)
 typedef	__uint_least16_t __char16_t;
 typedef	__uint_least32_t __char32_t;
 #endif
 /* In C++11, char16_t and char32_t are built-in types. */
 #if defined(__cplusplus) && __cplusplus >= 201103L
 #define	_CHAR16_T_DECLARED
 #define	_CHAR32_T_DECLARED
 #endif
 
 typedef struct {
 	long long __max_align1 __aligned(_Alignof(long long));
 	long double __max_align2 __aligned(_Alignof(long double));
 } __max_align_t;
 
-typedef	__uint32_t	__dev_t;	/* device number */
+typedef	__uint64_t	__dev_t;	/* device number */
 
 typedef	__uint32_t	__fixpt_t;	/* fixed point number */
 
 /*
  * mbstate_t is an opaque object to keep conversion state during multibyte
  * stream conversions.
  */
 typedef union {
 	char		__mbstate8[128];
 	__int64_t	_mbstateL;	/* for alignment */
 } __mbstate_t;
 
 typedef __uintmax_t     __rman_res_t;
 
 #endif /* !_SYS__TYPES_H_ */
Index: head/sys/sys/acct.h
===================================================================
--- head/sys/sys/acct.h	(revision 318735)
+++ head/sys/sys/acct.h	(revision 318736)
@@ -1,125 +1,150 @@
 /*-
  * Copyright (c) 1990, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)acct.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_ACCT_H_
 #define _SYS_ACCT_H_
 
 #ifdef _KERNEL
 #define float uint32_t
 #endif
 
 #define AC_COMM_LEN 16
 
 /*
- * Accounting structure version 2 (current).
+ * Accounting structure version 3 (current).
  * The first byte is always zero.
  * Time units are microseconds.
  */
 
-struct acctv2 {
+struct acctv3 {
 	uint8_t   ac_zero;		/* zero identifies new version */
 	uint8_t   ac_version;		/* record version number */
 	uint16_t  ac_len;		/* record length */
 
 	char	  ac_comm[AC_COMM_LEN];	/* command name */
 	float	  ac_utime;		/* user time */
 	float	  ac_stime;		/* system time */
 	float	  ac_etime;		/* elapsed time */
 	time_t	  ac_btime;		/* starting time */
 	uid_t	  ac_uid;		/* user id */
 	gid_t	  ac_gid;		/* group id */
 	float	  ac_mem;		/* average memory usage */
 	float	  ac_io;		/* count of IO blocks */
 	__dev_t   ac_tty;		/* controlling tty */
-
+	uint32_t  ac_pad0;
+#if defined(__powerpc__) && !defined(_LP64)
+	uint32_t  ac_pad1;
+#endif
 	uint16_t  ac_len2;		/* record length */
 	union {
-		__dev_t	  ac_align;	/* force v1 compatible alignment */
+		uint32_t  ac_align;	/* force v1 compatible alignment */
 
 #define	AFORK	0x01			/* forked but not exec'ed */
 /* ASU is no longer supported */
 #define	ASU	0x02			/* used super-user permissions */
 #define	ACOMPAT	0x04			/* used compatibility mode */
 #define	ACORE	0x08			/* dumped core */
 #define	AXSIG	0x10			/* killed by a signal */
 #define ANVER	0x20			/* new record version */
 
 		uint8_t   ac_flag;	/* accounting flags */
 	} ac_trailer;
 
 #define ac_flagx ac_trailer.ac_flag
 };
 
+struct acctv2 {
+	uint8_t   ac_zero;		/* zero identifies new version */
+	uint8_t   ac_version;		/* record version number */
+	uint16_t  ac_len;		/* record length */
 
+	char	  ac_comm[AC_COMM_LEN];	/* command name */
+	float	  ac_utime;		/* user time */
+	float	  ac_stime;		/* system time */
+	float	  ac_etime;		/* elapsed time */
+	time_t	  ac_btime;		/* starting time */
+	uid_t	  ac_uid;		/* user id */
+	gid_t	  ac_gid;		/* group id */
+	float	  ac_mem;		/* average memory usage */
+	float	  ac_io;		/* count of IO blocks */
+	uint32_t  ac_tty;		/* controlling tty */
+
+	uint16_t  ac_len2;		/* record length */
+	union {
+		uint32_t   ac_align;	/* force v1 compatible alignment */
+		uint8_t   ac_flag;	/* accounting flags */
+	} ac_trailer;
+};
+
 /*
  * Legacy accounting structure (rev. 1.5-1.18).
  * The first byte is always non-zero.
  * Some fields use a comp_t type which is a 3 bits base 8
  * exponent, 13 bit fraction ``floating point'' number.
  * Units are 1/AHZV1 seconds.
  */
 
 typedef uint16_t comp_t;
 
 struct acctv1 {
 	char	  ac_comm[AC_COMM_LEN];	/* command name */
 	comp_t	  ac_utime;		/* user time */
 	comp_t	  ac_stime;		/* system time */
 	comp_t	  ac_etime;		/* elapsed time */
 	time_t	  ac_btime;		/* starting time */
 	uid_t	  ac_uid;		/* user id */
 	gid_t	  ac_gid;		/* group id */
 	uint16_t  ac_mem;		/* average memory usage */
 	comp_t	  ac_io;		/* count of IO blocks */
-	__dev_t   ac_tty;		/* controlling tty */
+	uint32_t  ac_tty;		/* controlling tty */
 	uint8_t   ac_flag;		/* accounting flags */
 };
 
 /*
  * 1/AHZV1 is the granularity of the data encoded in the comp_t fields.
  * This is not necessarily equal to hz.
  */
 #define	AHZV1	64
 
 #ifdef _KERNEL
 struct thread;
 
 int	acct_process(struct thread *td);
 #undef float
 #endif
 
 #endif /* !_SYS_ACCT_H_ */
Index: head/sys/sys/dirent.h
===================================================================
--- head/sys/sys/dirent.h	(revision 318735)
+++ head/sys/sys/dirent.h	(revision 318736)
@@ -1,100 +1,129 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)dirent.h	8.3 (Berkeley) 8/10/94
  * $FreeBSD$
  */
 
 #ifndef	_SYS_DIRENT_H_
 #define	_SYS_DIRENT_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 
+#ifndef _INO_T_DECLARED
+typedef	__ino_t		ino_t;
+#define	_INO_T_DECLARED
+#endif
+
+#ifndef _OFF_T_DECLARED
+typedef	__off_t		off_t;
+#define	_OFF_T_DECLARED
+#endif
+
 /*
  * The dirent structure defines the format of directory entries returned by
  * the getdirentries(2) system call.
  *
  * A directory entry has a struct dirent at the front of it, containing its
  * inode number, the length of the entry, and the length of the name
- * contained in the entry.  These are followed by the name padded to a 4
+ * contained in the entry.  These are followed by the name padded to an 8
  * byte boundary with null bytes.  All names are guaranteed null terminated.
  * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * Explicit padding between the last member of the header (d_namelen) and
+ * d_name avoids ABI padding at the end of dirent on LP64 architectures.
+ * There is code depending on d_name being last.
  */
 
 struct dirent {
-	__uint32_t d_fileno;		/* file number of entry */
+	ino_t      d_fileno;		/* file number of entry */
+	off_t      d_off;		/* directory offset of entry */
 	__uint16_t d_reclen;		/* length of this record */
-	__uint8_t  d_type; 		/* file type, see below */
-	__uint8_t  d_namlen;		/* length of string in d_name */
+	__uint8_t  d_type;		/* file type, see below */
+	__uint8_t  d_pad0;
+	__uint16_t d_namlen;		/* length of string in d_name */
+	__uint16_t d_pad1;
 #if __BSD_VISIBLE
 #define	MAXNAMLEN	255
 	char	d_name[MAXNAMLEN + 1];	/* name must be no longer than this */
 #else
 	char	d_name[255 + 1];	/* name must be no longer than this */
 #endif
 };
 
+#if defined(_WANT_FREEBSD11_DIRENT) || defined(_KERNEL)
+struct freebsd11_dirent {
+	__uint32_t d_fileno;		/* file number of entry */
+	__uint16_t d_reclen;		/* length of this record */
+	__uint8_t  d_type;		/* file type, see below */
+	__uint8_t  d_namlen;		/* length of string in d_name */
+	char	d_name[255 + 1];	/* name must be no longer than this */
+};
+#endif /* _WANT_FREEBSD11_DIRENT || _KERNEL */
+
 #if __BSD_VISIBLE
+
 /*
  * File types
  */
 #define	DT_UNKNOWN	 0
 #define	DT_FIFO		 1
 #define	DT_CHR		 2
 #define	DT_DIR		 4
 #define	DT_BLK		 6
 #define	DT_REG		 8
 #define	DT_LNK		10
 #define	DT_SOCK		12
 #define	DT_WHT		14
 
 /*
  * Convert between stat structure types and directory types.
  */
 #define	IFTODT(mode)	(((mode) & 0170000) >> 12)
 #define	DTTOIF(dirtype)	((dirtype) << 12)
 
 /*
  * The _GENERIC_DIRSIZ macro gives the minimum record length which will hold
  * the directory entry.  This returns the amount of space in struct direct
  * without the d_name field, plus enough space for the name with a terminating
- * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ * null byte (dp->d_namlen+1), rounded up to a 8 byte boundary.
  *
  * XXX although this macro is in the implementation namespace, it requires
  * a manifest constant that is not.
  */
-#define	_GENERIC_DIRSIZ(dp) \
-    ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#define	_GENERIC_DIRLEN(namlen)					\
+	((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7)
+#define	_GENERIC_DIRSIZ(dp)	_GENERIC_DIRLEN((dp)->d_namlen)
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 #define	GENERIC_DIRSIZ(dp)	_GENERIC_DIRSIZ(dp)
 #endif
 
 #endif /* !_SYS_DIRENT_H_ */
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h	(revision 318735)
+++ head/sys/sys/mount.h	(revision 318736)
@@ -1,959 +1,987 @@
 /*-
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mount.h	8.21 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MOUNT_H_
 #define _SYS_MOUNT_H_
 
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #endif
 
 /*
  * NOTE: When changing statfs structure, mount structure, MNT_* flags or
  * MNTK_* flags also update DDB show mount command in vfs_subr.c.
  */
 
 typedef struct fsid { int32_t val[2]; } fsid_t;	/* filesystem id type */
 
 /*
  * File identifier.
  * These are unique per filesystem on a single machine.
  */
 #define	MAXFIDSZ	16
 
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_data0;		/* force longword alignment */
 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
  * filesystem statistics
  */
 #define	MFSNAMELEN	16		/* length of type name including null */
-#define	MNAMELEN	88		/* size of on/from name bufs */
-#define	STATFS_VERSION	0x20030518	/* current version number */
+#define	MNAMELEN	1024		/* size of on/from name bufs */
+#define	STATFS_VERSION	0x20140518	/* current version number */
 struct statfs {
 	uint32_t f_version;		/* structure version number */
 	uint32_t f_type;		/* type of filesystem */
 	uint64_t f_flags;		/* copy of mount exported flags */
 	uint64_t f_bsize;		/* filesystem fragment size */
 	uint64_t f_iosize;		/* optimal transfer block size */
 	uint64_t f_blocks;		/* total data blocks in filesystem */
 	uint64_t f_bfree;		/* free blocks in filesystem */
 	int64_t	 f_bavail;		/* free blocks avail to non-superuser */
 	uint64_t f_files;		/* total file nodes in filesystem */
 	int64_t	 f_ffree;		/* free nodes avail to non-superuser */
 	uint64_t f_syncwrites;		/* count of sync writes since mount */
 	uint64_t f_asyncwrites;		/* count of async writes since mount */
 	uint64_t f_syncreads;		/* count of sync reads since mount */
 	uint64_t f_asyncreads;		/* count of async reads since mount */
 	uint64_t f_spare[10];		/* unused spare */
 	uint32_t f_namemax;		/* maximum filename length */
 	uid_t	  f_owner;		/* user that mounted the filesystem */
 	fsid_t	  f_fsid;		/* filesystem id */
 	char	  f_charspare[80];	    /* spare string space */
 	char	  f_fstypename[MFSNAMELEN]; /* filesystem type name */
 	char	  f_mntfromname[MNAMELEN];  /* mounted filesystem */
 	char	  f_mntonname[MNAMELEN];    /* directory on which mounted */
 };
+
+#if defined(_WANT_FREEBSD11_STATFS) || defined(_KERNEL)
+#define	FREEBSD11_STATFS_VERSION	0x20030518 /* current version number */
+struct freebsd11_statfs {
+	uint32_t f_version;		/* structure version number */
+	uint32_t f_type;		/* type of filesystem */
+	uint64_t f_flags;		/* copy of mount exported flags */
+	uint64_t f_bsize;		/* filesystem fragment size */
+	uint64_t f_iosize;		/* optimal transfer block size */
+	uint64_t f_blocks;		/* total data blocks in filesystem */
+	uint64_t f_bfree;		/* free blocks in filesystem */
+	int64_t	 f_bavail;		/* free blocks avail to non-superuser */
+	uint64_t f_files;		/* total file nodes in filesystem */
+	int64_t	 f_ffree;		/* free nodes avail to non-superuser */
+	uint64_t f_syncwrites;		/* count of sync writes since mount */
+	uint64_t f_asyncwrites;		/* count of async writes since mount */
+	uint64_t f_syncreads;		/* count of sync reads since mount */
+	uint64_t f_asyncreads;		/* count of async reads since mount */
+	uint64_t f_spare[10];		/* unused spare */
+	uint32_t f_namemax;		/* maximum filename length */
+	uid_t	  f_owner;		/* user that mounted the filesystem */
+	fsid_t	  f_fsid;		/* filesystem id */
+	char	  f_charspare[80];	/* spare string space */
+	char	  f_fstypename[16];	/* filesystem type name */
+	char	  f_mntfromname[88];	/* mounted filesystem */
+	char	  f_mntonname[88];	/* directory on which mounted */
+};
+#endif /* _WANT_FREEBSD11_STATFS || _KERNEL */
 
 #ifdef _KERNEL
 #define	OMFSNAMELEN	16	/* length of fs type name, including null */
 #define	OMNAMELEN	(88 - 2 * sizeof(long))	/* size of on/from name bufs */
 
 /* XXX getfsstat.2 is out of date with write and read counter changes here. */
 /* XXX statfs.2 is out of date with read counter changes here. */
 struct ostatfs {
 	long	f_spare2;		/* placeholder */
 	long	f_bsize;		/* fundamental filesystem block size */
 	long	f_iosize;		/* optimal transfer block size */
 	long	f_blocks;		/* total data blocks in filesystem */
 	long	f_bfree;		/* free blocks in fs */
 	long	f_bavail;		/* free blocks avail to non-superuser */
 	long	f_files;		/* total file nodes in filesystem */
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* filesystem id */
 	uid_t	f_owner;		/* user that mounted the filesystem */
 	int	f_type;			/* type of filesystem */
 	int	f_flags;		/* copy of mount exported flags */
 	long	f_syncwrites;		/* count of sync writes since mount */
 	long	f_asyncwrites;		/* count of async writes since mount */
 	char	f_fstypename[OMFSNAMELEN]; /* fs type name */
 	char	f_mntonname[OMNAMELEN];	/* directory on which mounted */
 	long	f_syncreads;		/* count of sync reads since mount */
 	long	f_asyncreads;		/* count of async reads since mount */
 	short	f_spares1;		/* unused spare */
 	char	f_mntfromname[OMNAMELEN];/* mounted filesystem */
 	short	f_spares2;		/* unused spare */
 	/*
 	 * XXX on machines where longs are aligned to 8-byte boundaries, there
 	 * is an unnamed int32_t here.  This spare was after the apparent end
 	 * of the struct until we bit off the read counters from f_mntonname.
 	 */
 	long	f_spare[2];		/* unused spare */
 };
 
 TAILQ_HEAD(vnodelst, vnode);
 
 /* Mount options list */
 TAILQ_HEAD(vfsoptlist, vfsopt);
 struct vfsopt {
 	TAILQ_ENTRY(vfsopt) link;
 	char	*name;
 	void	*value;
 	int	len;
 	int	pos;
 	int	seen;
 };
 
 /*
  * Structure per mounted filesystem.  Each mounted filesystem has an
  * array of operations and an instance record.  The filesystems are
  * put on a doubly linked list.
  *
  * Lock reference:
  * 	l - mnt_listmtx
  *	m - mountlist_mtx
  *	i - interlock
  *	v - vnode freelist mutex
  *
  * Unmarked fields are considered stable as long as a ref is held.
  *
  */
 struct mount {
 	struct mtx	mnt_mtx;		/* mount structure interlock */
 	int		mnt_gen;		/* struct mount generation */
 #define	mnt_startzero	mnt_list
 	TAILQ_ENTRY(mount) mnt_list;		/* (m) mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
 	struct vfsconf	*mnt_vfc;		/* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
 	struct vnode	*mnt_syncer;		/* syncer vnode */
 	int		mnt_ref;		/* (i) Reference count */
 	struct vnodelst	mnt_nvnodelist;		/* (i) list of vnodes */
 	int		mnt_nvnodelistsize;	/* (i) # of vnodes */
 	int		mnt_writeopcount;	/* (i) write syscalls pending */
 	int		mnt_kern_flag;		/* (i) kernel only flags */
 	uint64_t	mnt_flag;		/* (i) flags shared with user */
 	struct vfsoptlist *mnt_opt;		/* current mount options */
 	struct vfsoptlist *mnt_optnew;		/* new options passed to fs */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	struct ucred	*mnt_cred;		/* credentials of mounter */
 	void *		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 	int		mnt_iosize_max;		/* max size for clusters, etc */
 	struct netexport *mnt_export;		/* export list */
 	struct label	*mnt_label;		/* MAC label for the fs */
 	u_int		mnt_hashseed;		/* Random seed for vfs_hash */
 	int		mnt_lockref;		/* (i) Lock reference count */
 	int		mnt_secondary_writes;   /* (i) # of secondary writes */
 	int		mnt_secondary_accwrites;/* (i) secondary wr. starts */
 	struct thread	*mnt_susp_owner;	/* (i) thread owning suspension */
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct mtx	mnt_listmtx;
 	struct vnodelst	mnt_activevnodelist;	/* (l) list of active vnodes */
 	int		mnt_activevnodelistsize;/* (l) # of active vnodes */
 	struct vnodelst	mnt_tmpfreevnodelist;	/* (l) list of free vnodes */
 	int		mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
 	TAILQ_ENTRY(mount) mnt_upper_link;	/* (m) we in the all uppers */
 	TAILQ_HEAD(, mount) mnt_uppers;		/* (m) upper mounts over us*/
 };
 
 /*
  * Definitions for MNT_VNODE_FOREACH_ALL.
  */
 struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp);
 struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp);
 void          __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
 
 #define MNT_VNODE_FOREACH_ALL(vp, mp, mvp)				\
 	for (vp = __mnt_vnode_first_all(&(mvp), (mp));			\
 		(vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp)))
 
 #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp)				\
 	do {								\
 		MNT_ILOCK(mp);						\
 		__mnt_vnode_markerfree_all(&(mvp), (mp));		\
 		/* MNT_IUNLOCK(mp); -- done in above function */	\
 		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);			\
 	} while (0)
 
 /*
  * Definitions for MNT_VNODE_FOREACH_ACTIVE.
  */
 struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
 struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
 void          __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
 
 #define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) 				\
 	for (vp = __mnt_vnode_first_active(&(mvp), (mp)); 		\
 		(vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
 
 #define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp)				\
 	__mnt_vnode_markerfree_active(&(mvp), (mp))
 
 #define	MNT_ILOCK(mp)	mtx_lock(&(mp)->mnt_mtx)
 #define	MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
 #define	MNT_IUNLOCK(mp)	mtx_unlock(&(mp)->mnt_mtx)
 #define	MNT_MTX(mp)	(&(mp)->mnt_mtx)
 #define	MNT_REF(mp)	(mp)->mnt_ref++
 #define	MNT_REL(mp)	do {						\
 	KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref"));		\
 	(mp)->mnt_ref--;						\
 	if ((mp)->mnt_ref == 0)						\
 		wakeup((mp));						\
 } while (0)
 
 #endif /* _KERNEL */
 
 /*
  * User specifiable flags, stored in mnt_flag.
  */
 #define	MNT_RDONLY	0x0000000000000001ULL /* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x0000000000000002ULL /* fs written synchronously */
 #define	MNT_NOEXEC	0x0000000000000004ULL /* can't exec from filesystem */
 #define	MNT_NOSUID	0x0000000000000008ULL /* don't honor setuid fs bits */
 #define	MNT_NFS4ACLS	0x0000000000000010ULL /* enable NFS version 4 ACLs */
 #define	MNT_UNION	0x0000000000000020ULL /* union with underlying fs */
 #define	MNT_ASYNC	0x0000000000000040ULL /* fs written asynchronously */
 #define	MNT_SUIDDIR	0x0000000000100000ULL /* special SUID dir handling */
 #define	MNT_SOFTDEP	0x0000000000200000ULL /* using soft updates */
 #define	MNT_NOSYMFOLLOW	0x0000000000400000ULL /* do not follow symlinks */
 #define	MNT_GJOURNAL	0x0000000002000000ULL /* GEOM journal support enabled */
 #define	MNT_MULTILABEL	0x0000000004000000ULL /* MAC support for objects */
 #define	MNT_ACLS	0x0000000008000000ULL /* ACL support enabled */
 #define	MNT_NOATIME	0x0000000010000000ULL /* dont update file access time */
 #define	MNT_NOCLUSTERR	0x0000000040000000ULL /* disable cluster read */
 #define	MNT_NOCLUSTERW	0x0000000080000000ULL /* disable cluster write */
 #define	MNT_SUJ		0x0000000100000000ULL /* using journaled soft updates */
 #define	MNT_AUTOMOUNTED	0x0000000200000000ULL /* mounted by automountd(8) */
 
 /*
  * NFS export related mount flags.
  */
 #define	MNT_EXRDONLY	0x0000000000000080ULL	/* exported read only */
 #define	MNT_EXPORTED	0x0000000000000100ULL	/* filesystem is exported */
 #define	MNT_DEFEXPORTED	0x0000000000000200ULL	/* exported to the world */
 #define	MNT_EXPORTANON	0x0000000000000400ULL	/* anon uid mapping for all */
 #define	MNT_EXKERB	0x0000000000000800ULL	/* exported with Kerberos */
 #define	MNT_EXPUBLIC	0x0000000020000000ULL	/* public export (WebNFS) */
 
 /*
  * Flags set by internal operations,
  * but visible to the user.
  * XXX some of these are not quite right.. (I've never seen the root flag set)
  */
 #define	MNT_LOCAL	0x0000000000001000ULL /* filesystem is stored locally */
 #define	MNT_QUOTA	0x0000000000002000ULL /* quotas are enabled on fs */
 #define	MNT_ROOTFS	0x0000000000004000ULL /* identifies the root fs */
 #define	MNT_USER	0x0000000000008000ULL /* mounted by a user */
 #define	MNT_IGNORE	0x0000000000800000ULL /* do not show entry in df */
 
 /*
  * Mask of flags that are visible to statfs().
  * XXX I think that this could now become (~(MNT_CMDFLAGS))
  * but the 'mount' program may need changing to handle this.
  */
 #define	MNT_VISFLAGMASK	(MNT_RDONLY	| MNT_SYNCHRONOUS | MNT_NOEXEC	| \
 			MNT_NOSUID	| MNT_UNION	| MNT_SUJ	| \
 			MNT_ASYNC	| MNT_EXRDONLY	| MNT_EXPORTED	| \
 			MNT_DEFEXPORTED	| MNT_EXPORTANON| MNT_EXKERB	| \
 			MNT_LOCAL	| MNT_USER	| MNT_QUOTA	| \
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	| \
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
 			MNT_GJOURNAL	| MNT_MULTILABEL | MNT_ACLS	| \
 			MNT_NFS4ACLS	| MNT_AUTOMOUNTED)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| \
 			MNT_SYNCHRONOUS	| MNT_UNION	| MNT_ASYNC	| \
 			MNT_NOATIME | \
 			MNT_NOSYMFOLLOW	| MNT_IGNORE	| \
 			MNT_NOCLUSTERR	| MNT_NOCLUSTERW | MNT_SUIDDIR	| \
 			MNT_ACLS	| MNT_USER	| MNT_NFS4ACLS	| \
 			MNT_AUTOMOUNTED)
 
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
  * XXX: These are not STATES and really should be somewhere else.
  * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL,
  *      but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2),
  *      and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2),
  *      it's harmless.
  */
 #define	MNT_UPDATE	0x0000000000010000ULL /* not real mount, just update */
 #define	MNT_DELEXPORT	0x0000000000020000ULL /* delete export host lists */
 #define	MNT_RELOAD	0x0000000000040000ULL /* reload filesystem data */
 #define	MNT_FORCE	0x0000000000080000ULL /* force unmount or readonly */
 #define	MNT_SNAPSHOT	0x0000000001000000ULL /* snapshot the filesystem */
 #define	MNT_NONBUSY	0x0000000004000000ULL /* check vnode use counts. */
 #define	MNT_BYFSID	0x0000000008000000ULL /* specify filesystem by ID. */
 #define MNT_CMDFLAGS   (MNT_UPDATE	| MNT_DELEXPORT	| MNT_RELOAD	| \
 			MNT_FORCE	| MNT_SNAPSHOT	| MNT_NONBUSY	| \
 			MNT_BYFSID)
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
  * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
  * past the mount point.  This keeps the subtree stable during mounts
  * and unmounts.
  *
  * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while
  * dounmount() is still waiting to lock the mountpoint. This allows
  * the filesystem to cancel operations that might otherwise deadlock
  * with the unmount attempt (used by NFS).
  *
  * MNTK_NOINSMNTQ is strict subset of MNTK_UNMOUNT. They are separated
  * to allow for failed unmount attempt to restore the syncer vnode for
  * the mount.
  */
 #define MNTK_UNMOUNTF	0x00000001	/* forced unmount in progress */
 #define MNTK_ASYNC	0x00000002	/* filtered async flag */
 #define MNTK_SOFTDEP	0x00000004	/* async disabled by softdep */
 #define MNTK_NOINSMNTQ	0x00000008	/* insmntque is not allowed */
 #define	MNTK_DRAINING	0x00000010	/* lock draining is happening */
 #define	MNTK_REFEXPIRE	0x00000020	/* refcount expiring is happening */
 #define MNTK_EXTENDED_SHARED	0x00000040 /* Allow shared locking for more ops */
 #define	MNTK_SHARED_WRITES	0x00000080 /* Allow shared locking for writes */
 #define	MNTK_NO_IOPF	0x00000100	/* Disallow page faults during reads
 					   and writes. Filesystem shall properly
 					   handle i/o state on EFAULT. */
 #define	MNTK_VGONE_UPPER	0x00000200
 #define	MNTK_VGONE_WAITER	0x00000400
 #define	MNTK_LOOKUP_EXCL_DOTDOT	0x00000800
 #define	MNTK_MARKER		0x00001000
 #define	MNTK_UNMAPPED_BUFS	0x00002000
 #define	MNTK_USES_BCACHE	0x00004000 /* FS uses the buffer cache. */
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */
 #define	MNTK_SUSPEND2	0x04000000	/* block secondary writes */
 #define	MNTK_SUSPENDED	0x10000000	/* write operations are suspended */
 #define	MNTK_NULL_NOCACHE	0x20000000 /* auto disable cache for nullfs
 					      mounts over this fs */
 #define MNTK_LOOKUP_SHARED	0x40000000 /* FS supports shared lock lookups */
 #define	MNTK_NOKNOTE	0x80000000	/* Don't send KNOTEs from VOP hooks */
 
 #ifdef _KERNEL
 static inline int
 MNT_SHARED_WRITES(struct mount *mp)
 {
 
 	return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0);
 }
 
 static inline int
 MNT_EXTENDED_SHARED(struct mount *mp)
 {
 
 	return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0);
 }
 #endif
 
 /*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
  * identifier VFS_VFSCONF returns information about all filesystems.
  * Second level identifier VFS_GENERIC is non-terminal.
  */
 #define	VFS_VFSCONF		0	/* get configured filesystems */
 #define	VFS_GENERIC		0	/* generic filesystem information */
 /*
  * Third level identifiers for VFS_GENERIC are given below; third
  * level identifiers for specific filesystems are given in their
  * mount specific header files.
  */
 #define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
 #define VFS_CONF	2	/* struct: vfsconf for filesystem given
 				   as next argument */
 
 /*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
 #define MNT_WAIT	1	/* synchronously wait for I/O to complete */
 #define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
 #define MNT_LAZY	3	/* push data not written by filesystem syncer */
 #define MNT_SUSPEND	4	/* Suspend file system after sync */
 
 /*
  * Generic file handle
  */
 struct fhandle {
 	fsid_t	fh_fsid;	/* Filesystem id of mount point */
 	struct	fid fh_fid;	/* Filesys specific id */
 };
 typedef struct fhandle	fhandle_t;
 
 /*
  * Old export arguments without security flavor list
  */
 struct oexport_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	xucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	u_char	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	u_char	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 };
 
 /*
  * Export arguments for local filesystem mount calls.
  */
 #define	MAXSECFLAVORS	5
 struct export_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	xucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	u_char	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	u_char	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 	int	ex_numsecflavors;	/* security flavor count */
 	int	ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */
 };
 
 /*
  * Structure holding information for a publicly exported filesystem
  * (WebNFS). Currently the specs allow just for one such filesystem.
  */
 struct nfs_public {
 	int		np_valid;	/* Do we hold valid information */
 	fhandle_t	np_handle;	/* Filehandle for pub fs (internal) */
 	struct mount	*np_mount;	/* Mountpoint of exported fs */
 	char		*np_index;	/* Index file */
 };
 
 /*
  * Filesystem configuration information. One of these exists for each
  * type of filesystem supported by the kernel. These are searched at
  * mount time to identify the requested filesystem.
  *
  * XXX: Never change the first two arguments!
  */
 struct vfsconf {
 	u_int	vfc_version;		/* ABI version number */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsoptdecl *vfc_opts;	/* mount options */
 	TAILQ_ENTRY(vfsconf) vfc_list;	/* list of vfscons */
 };
 
 /* Userland version of the struct vfsconf. */
 struct xvfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 #ifndef BURN_BRIDGES
 struct ovfsconf {
 	void	*vfc_vfsops;
 	char	vfc_name[32];
 	int	vfc_index;
 	int	vfc_refcount;
 	int	vfc_flags;
 };
 #endif
 
 /*
  * NB: these flags refer to IMPLEMENTATION properties, not properties of
  * any actual mounts; i.e., it does not make sense to change the flags.
  */
 #define	VFCF_STATIC	0x00010000	/* statically compiled into kernel */
 #define	VFCF_NETWORK	0x00020000	/* may get data over the network */
 #define	VFCF_READONLY	0x00040000	/* writes are not implemented */
 #define	VFCF_SYNTHETIC	0x00080000	/* data does not represent real files */
 #define	VFCF_LOOPBACK	0x00100000	/* aliases some other mounted FS */
 #define	VFCF_UNICODE	0x00200000	/* stores file names as Unicode */
 #define	VFCF_JAIL	0x00400000	/* can be mounted from within a jail */
 #define	VFCF_DELEGADMIN	0x00800000	/* supports delegated administration */
 #define	VFCF_SBDRY	0x01000000	/* defer stop requests */
 
 typedef uint32_t fsctlop_t;
 
 struct vfsidctl {
 	int		vc_vers;	/* should be VFSIDCTL_VERS1 (below) */
 	fsid_t		vc_fsid;	/* fsid to operate on */
 	char		vc_fstypename[MFSNAMELEN];
 					/* type of fs 'nfs' or '*' */
 	fsctlop_t	vc_op;		/* operation VFS_CTL_* (below) */
 	void		*vc_ptr;	/* pointer to data structure */
 	size_t		vc_len;		/* sizeof said structure */
 	u_int32_t	vc_spare[12];	/* spare (must be zero) */
 };
 
 /* vfsidctl API version. */
 #define VFS_CTL_VERS1	0x01
 
 /*
  * New style VFS sysctls, do not reuse/conflict with the namespace for
  * private sysctls.
  * All "global" sysctl ops have the 33rd bit set:
  * 0x...1....
  * Private sysctl ops should have the 33rd bit unset.
  */
 #define VFS_CTL_QUERY	0x00010001	/* anything wrong? (vfsquery) */
 #define VFS_CTL_TIMEO	0x00010002	/* set timeout for vfs notification */
 #define VFS_CTL_NOLOCKS	0x00010003	/* disable file locking */
 
 struct vfsquery {
 	u_int32_t	vq_flags;
 	u_int32_t	vq_spare[31];
 };
 
 /* vfsquery flags */
 #define VQ_NOTRESP	0x0001	/* server down */
 #define VQ_NEEDAUTH	0x0002	/* server bad auth */
 #define VQ_LOWDISK	0x0004	/* we're low on space */
 #define VQ_MOUNT	0x0008	/* new filesystem arrived */
 #define VQ_UNMOUNT	0x0010	/* filesystem has left */
 #define VQ_DEAD		0x0020	/* filesystem is dead, needs force unmount */
 #define VQ_ASSIST	0x0040	/* filesystem needs assistance from external
 				   program */
 #define VQ_NOTRESPLOCK	0x0080	/* server lockd down */
 #define VQ_FLAG0100	0x0100	/* placeholder */
 #define VQ_FLAG0200	0x0200	/* placeholder */
 #define VQ_FLAG0400	0x0400	/* placeholder */
 #define VQ_FLAG0800	0x0800	/* placeholder */
 #define VQ_FLAG1000	0x1000	/* placeholder */
 #define VQ_FLAG2000	0x2000	/* placeholder */
 #define VQ_FLAG4000	0x4000	/* placeholder */
 #define VQ_FLAG8000	0x8000	/* placeholder */
 
 #ifdef _KERNEL
 /* Point a sysctl request at a vfsidctl's data. */
 #define VCTLTOREQ(vc, req)						\
 	do {								\
 		(req)->newptr = (vc)->vc_ptr;				\
 		(req)->newlen = (vc)->vc_len;				\
 		(req)->newidx = 0;					\
 	} while (0)
 #endif
 
 struct iovec;
 struct uio;
 
 #ifdef _KERNEL
 
 /*
  * vfs_busy specific flags and mask.
  */
 #define	MBF_NOWAIT	0x01
 #define	MBF_MNTLSTLOCK	0x02
 #define	MBF_MASK	(MBF_NOWAIT | MBF_MNTLSTLOCK)
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MOUNT);
 MALLOC_DECLARE(M_STATFS);
 #endif
 extern int maxvfsconf;		/* highest defined filesystem type */
 
 TAILQ_HEAD(vfsconfhead, vfsconf);
 extern struct vfsconfhead vfsconf;
 
 /*
  * Operations supported on mounted filesystem.
  */
 struct mount_args;
 struct nameidata;
 struct sysctl_req;
 struct mntarg;
 
 typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags);
 typedef int vfs_unmount_t(struct mount *mp, int mntflags);
 typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp);
 typedef	int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg);
 typedef	int vfs_statfs_t(struct mount *mp, struct statfs *sbp);
 typedef	int vfs_sync_t(struct mount *mp, int waitfor);
 typedef	int vfs_vget_t(struct mount *mp, ino_t ino, int flags,
 		    struct vnode **vpp);
 typedef	int vfs_fhtovp_t(struct mount *mp, struct fid *fhp,
 		    int flags, struct vnode **vpp);
 typedef	int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam,
 		    int *extflagsp, struct ucred **credanonp,
 		    int *numsecflavors, int **secflavors);
 typedef	int vfs_init_t(struct vfsconf *);
 typedef	int vfs_uninit_t(struct vfsconf *);
 typedef	int vfs_extattrctl_t(struct mount *mp, int cmd,
 		    struct vnode *filename_vp, int attrnamespace,
 		    const char *attrname);
 typedef	int vfs_mount_t(struct mount *mp);
 typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
 		    struct sysctl_req *req);
 typedef void vfs_susp_clean_t(struct mount *mp);
 typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
 typedef void vfs_purge_t(struct mount *mp);
 
 struct vfsops {
 	vfs_mount_t		*vfs_mount;
 	vfs_cmount_t		*vfs_cmount;
 	vfs_unmount_t		*vfs_unmount;
 	vfs_root_t		*vfs_root;
 	vfs_quotactl_t		*vfs_quotactl;
 	vfs_statfs_t		*vfs_statfs;
 	vfs_sync_t		*vfs_sync;
 	vfs_vget_t		*vfs_vget;
 	vfs_fhtovp_t		*vfs_fhtovp;
 	vfs_checkexp_t		*vfs_checkexp;
 	vfs_init_t		*vfs_init;
 	vfs_uninit_t		*vfs_uninit;
 	vfs_extattrctl_t	*vfs_extattrctl;
 	vfs_sysctl_t		*vfs_sysctl;
 	vfs_susp_clean_t	*vfs_susp_clean;
 	vfs_notify_lowervp_t	*vfs_reclaim_lowervp;
 	vfs_notify_lowervp_t	*vfs_unlink_lowervp;
 	vfs_purge_t		*vfs_purge;
 	vfs_mount_t		*vfs_spare[6];	/* spares for ABI compat */
 };
 
 vfs_statfs_t	__vfs_statfs;
 
 #define	VFS_PROLOGUE(MP)	do {					\
 	struct mount *mp__;						\
 	int _prev_stops;						\
 									\
 	mp__ = (MP);							\
 	_prev_stops = sigdeferstop((mp__ != NULL &&			\
 	    (mp__->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0) ?		\
 	    SIGDEFERSTOP_SILENT : SIGDEFERSTOP_NOP);
 
 #define	VFS_EPILOGUE(MP)						\
 	sigallowstop(_prev_stops);					\
 } while (0)
 
 #define	VFS_MOUNT(MP) ({						\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_mount)(MP);				\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_UNMOUNT(MP, FORCE) ({					\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE);			\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_ROOT(MP, FLAGS, VPP) ({					\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP);		\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_QUOTACTL(MP, C, U, A) ({					\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A);		\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_STATFS(MP, SBP) ({						\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = __vfs_statfs((MP), (SBP));				\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_SYNC(MP, WAIT) ({						\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT);			\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_VGET(MP, INO, FLAGS, VPP) ({				\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP);		\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({				\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP);	\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({		\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\
 	    SEC);							\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_EXTATTRCTL(MP, C, FN, NS, N) ({				\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N);	\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_SYSCTL(MP, OP, REQ) ({					\
 	int _rc;							\
 									\
 	VFS_PROLOGUE(MP);						\
 	_rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ);			\
 	VFS_EPILOGUE(MP);						\
 	_rc; })
 
 #define	VFS_SUSP_CLEAN(MP) do {						\
 	if (*(MP)->mnt_op->vfs_susp_clean != NULL) {			\
 		VFS_PROLOGUE(MP);					\
 		(*(MP)->mnt_op->vfs_susp_clean)(MP);			\
 		VFS_EPILOGUE(MP);					\
 	}								\
 } while (0)
 
 #define	VFS_RECLAIM_LOWERVP(MP, VP) do {				\
 	if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) {		\
 		VFS_PROLOGUE(MP);					\
 		(*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP));	\
 		VFS_EPILOGUE(MP);					\
 	}								\
 } while (0)
 
 #define	VFS_UNLINK_LOWERVP(MP, VP) do {					\
 	if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) {		\
 		VFS_PROLOGUE(MP);					\
 		(*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP));	\
 		VFS_EPILOGUE(MP);					\
 	}								\
 } while (0)
 
 #define	VFS_PURGE(MP) do {						\
 	if (*(MP)->mnt_op->vfs_purge != NULL) {				\
 		VFS_PROLOGUE(MP);					\
 		(*(MP)->mnt_op->vfs_purge)(MP);				\
 		VFS_EPILOGUE(MP);					\
 	}								\
 } while (0)
 
 #define VFS_KNOTE_LOCKED(vp, hint) do					\
 {									\
 	if (((vp)->v_vflag & VV_NOKNOTE) == 0)				\
 		VN_KNOTE((vp), (hint), KNF_LISTLOCKED);			\
 } while (0)
 
 #define VFS_KNOTE_UNLOCKED(vp, hint) do					\
 {									\
 	if (((vp)->v_vflag & VV_NOKNOTE) == 0)				\
 		VN_KNOTE((vp), (hint), 0);				\
 } while (0)
 
 #define	VFS_NOTIFY_UPPER_RECLAIM	1
 #define	VFS_NOTIFY_UPPER_UNLINK		2
 
 #include <sys/module.h>
 
 /*
  * Version numbers.
  */
 #define VFS_VERSION_00	0x19660120
 #define VFS_VERSION_01	0x20121030
 #define VFS_VERSION	VFS_VERSION_01
 
 #define VFS_SET(vfsops, fsname, flags) \
 	static struct vfsconf fsname ## _vfsconf = {		\
 		.vfc_version = VFS_VERSION,			\
 		.vfc_name = #fsname,				\
 		.vfc_vfsops = &vfsops,				\
 		.vfc_typenum = -1,				\
 		.vfc_flags = flags,				\
 	};							\
 	static moduledata_t fsname ## _mod = {			\
 		#fsname,					\
 		vfs_modevent,					\
 		& fsname ## _vfsconf				\
 	};							\
 	DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE)
 
 /*
  * exported vnode operations
  */
 
 int	dounmount(struct mount *, int, struct thread *);
 
 int	kernel_mount(struct mntarg *ma, uint64_t flags);
 int	kernel_vmount(int flags, ...);
 struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len);
 struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name);
 struct mntarg *mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...);
 struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, int len);
 void	statfs_scale_blocks(struct statfs *sf, long max_size);
 struct vfsconf *vfs_byname(const char *);
 struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *);
 void	vfs_mount_destroy(struct mount *);
 void	vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
 void	vfs_freeopts(struct vfsoptlist *opts);
 void	vfs_deleteopt(struct vfsoptlist *opts, const char *name);
 int	vfs_buildopts(struct uio *auio, struct vfsoptlist **options);
 int	vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 	    uint64_t val);
 int	vfs_getopt(struct vfsoptlist *, const char *, void **, int *);
 int	vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
 int	vfs_getopt_size(struct vfsoptlist *opts, const char *name,
 	    off_t *value);
 char	*vfs_getopts(struct vfsoptlist *, const char *, int *error);
 int	vfs_copyopt(struct vfsoptlist *, const char *, void *, int);
 int	vfs_filteropt(struct vfsoptlist *, const char **legal);
 void	vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...);
 int	vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...);
 int	vfs_setopt(struct vfsoptlist *opts, const char *name, void *value,
 	    int len);
 int	vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value,
 	    int len);
 int	vfs_setopts(struct vfsoptlist *opts, const char *name,
 	    const char *value);
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	    (struct mount *, struct netexport *, struct export_args *);
 void	vfs_msync(struct mount *, int);
 int	vfs_busy(struct mount *, int);
 int	vfs_export			 /* process mount export info */
 	    (struct mount *, struct export_args *);
 void	vfs_allocate_syncvnode(struct mount *);
 void	vfs_deallocate_syncvnode(struct mount *);
 int	vfs_donmount(struct thread *td, uint64_t fsflags,
 	    struct uio *fsoptions);
 void	vfs_getnewfsid(struct mount *);
 struct cdev *vfs_getrootfsid(struct mount *);
 struct	mount *vfs_getvfs(fsid_t *);      /* return vfs given fsid */
 struct	mount *vfs_busyfs(fsid_t *);
 int	vfs_modevent(module_t, int, void *);
 void	vfs_mount_error(struct mount *, const char *, ...);
 void	vfs_mountroot(void);			/* mount our root filesystem */
 void	vfs_mountedfrom(struct mount *, const char *from);
 void	vfs_notify_upper(struct vnode *, int);
 void	vfs_oexport_conv(const struct oexport_args *oexp,
 	    struct export_args *exp);
 void	vfs_ref(struct mount *);
 void	vfs_rel(struct mount *);
 struct mount *vfs_mount_alloc(struct vnode *, struct vfsconf *, const char *,
 	    struct ucred *);
 int	vfs_suser(struct mount *, struct thread *);
 void	vfs_unbusy(struct mount *);
 void	vfs_unmountall(void);
 extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct mtx mountlist_mtx;
 extern	struct nfs_public nfs_pub;
 extern	struct sx vfsconf_sx;
 #define	vfsconf_lock()		sx_xlock(&vfsconf_sx)
 #define	vfsconf_unlock()	sx_xunlock(&vfsconf_sx)
 #define	vfsconf_slock()		sx_slock(&vfsconf_sx)
 #define	vfsconf_sunlock()	sx_sunlock(&vfsconf_sx)
 
 /*
  * Declarations for these vfs default operations are located in
  * kern/vfs_default.c.  They will be automatically used to replace
  * null entries in VFS ops tables when registering a new filesystem
  * type in the global table.
  */
 vfs_root_t		vfs_stdroot;
 vfs_quotactl_t		vfs_stdquotactl;
 vfs_statfs_t		vfs_stdstatfs;
 vfs_sync_t		vfs_stdsync;
 vfs_sync_t		vfs_stdnosync;
 vfs_vget_t		vfs_stdvget;
 vfs_fhtovp_t		vfs_stdfhtovp;
 vfs_checkexp_t		vfs_stdcheckexp;
 vfs_init_t		vfs_stdinit;
 vfs_uninit_t		vfs_stduninit;
 vfs_extattrctl_t	vfs_stdextattrctl;
 vfs_sysctl_t		vfs_stdsysctl;
 
 void	syncer_suspend(void);
 void	syncer_resume(void);
 
 #else /* !_KERNEL */
 
 #include <sys/cdefs.h>
 
 struct stat;
 
 __BEGIN_DECLS
 int	fhopen(const struct fhandle *, int);
 int	fhstat(const struct fhandle *, struct stat *);
 int	fhstatfs(const struct fhandle *, struct statfs *);
 int	fstatfs(int, struct statfs *);
 int	getfh(const char *, fhandle_t *);
 int	getfsstat(struct statfs *, long, int);
 int	getmntinfo(struct statfs **, int);
 int	lgetfh(const char *, fhandle_t *);
 int	mount(const char *, const char *, int, void *);
 int	nmount(struct iovec *, unsigned int, int);
 int	statfs(const char *, struct statfs *);
 int	unmount(const char *, int);
 
 /* C library stuff */
 int	getvfsbyname(const char *, struct xvfsconf *);
 __END_DECLS
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MOUNT_H_ */
Index: head/sys/sys/param.h
===================================================================
--- head/sys/sys/param.h	(revision 318735)
+++ head/sys/sys/param.h	(revision 318736)
@@ -1,363 +1,363 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.3 (Berkeley) 4/4/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PARAM_H_
 #define _SYS_PARAM_H_
 
 #include <sys/_null.h>
 
 #define	BSD	199506		/* System version (year & month). */
 #define BSD4_3	1
 #define BSD4_4	1
 
 /*
  * __FreeBSD_version numbers are documented in the Porter's Handbook.
  * If you bump the version for any reason, you should update the documentation
  * there.
  * Currently this lives here in the doc/ repository:
  *
  *	head/en_US.ISO8859-1/books/porters-handbook/versions/chapter.xml
  *
  * scheme is:  <major><two digit minor>Rxx
  *		'R' is in the range 0 to 4 if this is a release branch or
  *		x.0-CURRENT before RELENG_*_0 is created, otherwise 'R' is
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1200030	/* Master, propagated to newvers */
+#define __FreeBSD_version 1200031	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
  * which by definition is always true on FreeBSD. This macro is also defined
  * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD.
  *
  * It is tempting to use this macro in userland code when we want to enable
  * kernel-specific routines, and in fact it's fine to do this in code that
  * is part of FreeBSD itself.  However, be aware that as presence of this
  * macro is still not widespread (e.g. older FreeBSD versions, 3rd party
  * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in
  * external applications without also checking for __FreeBSD__ as an
  * alternative.
  */
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
 #ifdef _KERNEL
 #define	P_OSREL_SIGWAIT			700000
 #define	P_OSREL_SIGSEGV			700004
 #define	P_OSREL_MAP_ANON		800104
 #define	P_OSREL_MAP_FSTRICT		1100036
 #define	P_OSREL_SHUTDOWN_ENOTCONN	1100077
 
 #define	P_OSREL_MAJOR(x)		((x) / 100000)
 #endif
 
 #ifndef LOCORE
 #include <sys/types.h>
 #endif
 
 /*
  * Machine-independent constants (some used in following include files).
  * Redefined constants are from POSIX 1003.1 limits file.
  *
  * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
  */
 #include <sys/syslimits.h>
 
 #define	MAXCOMLEN	19		/* max command name remembered */
 #define	MAXINTERP	PATH_MAX	/* max interpreter file name length */
 #define	MAXLOGNAME	33		/* max login name length (incl. NUL) */
 #define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
 #define	NCARGS		ARG_MAX		/* max bytes for an exec function */
 #define	NGROUPS		(NGROUPS_MAX+1)	/* max number groups */
 #define	NOFILE		OPEN_MAX	/* max open files per process */
 #define	NOGROUP		65535		/* marker for empty group set member */
 #define MAXHOSTNAMELEN	256		/* max hostname size */
 #define SPECNAMELEN	63		/* max length of devicename */
 
 /* More types and definitions used throughout the kernel. */
 #ifdef _KERNEL
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #ifndef LOCORE
 #include <sys/time.h>
 #include <sys/priority.h>
 #endif
 
 #ifndef FALSE
 #define	FALSE	0
 #endif
 #ifndef TRUE
 #define	TRUE	1
 #endif
 #endif
 
 #ifndef _KERNEL
 /* Signals. */
 #include <sys/signal.h>
 #endif
 
 /* Machine type dependent parameters. */
 #include <machine/param.h>
 #ifndef _KERNEL
 #include <sys/limits.h>
 #endif
 
 #ifndef DEV_BSHIFT
 #define	DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
 #endif
 #define	DEV_BSIZE	(1<<DEV_BSHIFT)
 
 #ifndef BLKDEV_IOSIZE
 #define BLKDEV_IOSIZE  PAGE_SIZE	/* default block device I/O size */
 #endif
 #ifndef DFLTPHYS
 #define DFLTPHYS	(64 * 1024)	/* default max raw I/O transfer size */
 #endif
 #ifndef MAXPHYS
 #define MAXPHYS		(128 * 1024)	/* max raw I/O transfer size */
 #endif
 #ifndef MAXDUMPPGS
 #define MAXDUMPPGS	(DFLTPHYS/PAGE_SIZE)
 #endif
 
 /*
  * Constants related to network buffer management.
  * MCLBYTES must be no larger than PAGE_SIZE.
  */
 #ifndef	MSIZE
 #define	MSIZE		256		/* size of an mbuf */
 #endif
 
 #ifndef	MCLSHIFT
 #define MCLSHIFT	11		/* convert bytes to mbuf clusters */
 #endif	/* MCLSHIFT */
 
 #define MCLBYTES	(1 << MCLSHIFT)	/* size of an mbuf cluster */
 
 #if PAGE_SIZE < 2048
 #define	MJUMPAGESIZE	MCLBYTES
 #elif PAGE_SIZE <= 8192
 #define	MJUMPAGESIZE	PAGE_SIZE
 #else
 #define	MJUMPAGESIZE	(8 * 1024)
 #endif
 
 #define	MJUM9BYTES	(9 * 1024)	/* jumbo cluster 9k */
 #define	MJUM16BYTES	(16 * 1024)	/* jumbo cluster 16k */
 
 /*
  * Some macros for units conversion
  */
 
 /* clicks to bytes */
 #ifndef ctob
 #define ctob(x)	((x)<<PAGE_SHIFT)
 #endif
 
 /* bytes to clicks */
 #ifndef btoc
 #define btoc(x)	(((vm_offset_t)(x)+PAGE_MASK)>>PAGE_SHIFT)
 #endif
 
 /*
  * btodb() is messy and perhaps slow because `bytes' may be an off_t.  We
  * want to shift an unsigned type to avoid sign extension and we don't
  * want to widen `bytes' unnecessarily.  Assume that the result fits in
  * a daddr_t.
  */
 #ifndef btodb
 #define btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
 	(sizeof (bytes) > sizeof(long) \
 	 ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \
 	 : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT))
 #endif
 
 #ifndef dbtob
 #define dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
 	((off_t)(db) << DEV_BSHIFT)
 #endif
 
 #define	PRIMASK	0x0ff
 #define	PCATCH	0x100		/* OR'd with pri for tsleep to check signals */
 #define	PDROP	0x200	/* OR'd with pri to stop re-entry of interlock mutex */
 
 #define	NZERO	0		/* default "nice" */
 
 #define	NBBY	8		/* number of bits in a byte */
 #define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
 
 #define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
 
 #define	NODEV	(dev_t)(-1)	/* non-existent device */
 
 /*
  * File system parameters and macros.
  *
  * MAXBSIZE -	Filesystems are made out of blocks of at most MAXBSIZE bytes
  *		per block.  MAXBSIZE may be made larger without effecting
  *		any existing filesystems as long as it does not exceed MAXPHYS,
  *		and may be made smaller at the risk of not being able to use
  *		filesystems which require a block size exceeding MAXBSIZE.
  *
  * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache.  This must
  *		be >= MAXBSIZE and can be set differently for different
  *		architectures by defining it in <machine/param.h>.
  *		Making this larger allows NFS to do larger reads/writes.
  *
  * BKVASIZE -	Nominal buffer space per buffer, in bytes.  BKVASIZE is the
  *		minimum KVM memory reservation the kernel is willing to make.
  *		Filesystems can of course request smaller chunks.  Actual
  *		backing memory uses a chunk size of a page (PAGE_SIZE).
  *		The default value here can be overridden on a per-architecture
  *		basis by defining it in <machine/param.h>.  This should
  *		probably be done to increase its value, when MAXBCACHEBUF is
  *		defined as a larger value in <machine/param.h>.
  *
  *		If you make BKVASIZE too small you risk seriously fragmenting
  *		the buffer KVM map which may slow things down a bit.  If you
  *		make it too big the kernel will not be able to optimally use
  *		the KVM memory reserved for the buffer cache and will wind
  *		up with too-few buffers.
  *
  *		The default is 16384, roughly 2x the block size used by a
  *		normal UFS filesystem.
  */
 #define MAXBSIZE	65536	/* must be power of 2 */
 #ifndef	MAXBCACHEBUF
 #define	MAXBCACHEBUF	MAXBSIZE /* must be a power of 2 >= MAXBSIZE */
 #endif
 #ifndef	BKVASIZE
 #define BKVASIZE	16384	/* must be power of 2 */
 #endif
 #define BKVAMASK	(BKVASIZE-1)
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
  * symbolic links. It is used to allocate a temporary buffer from the buffer
  * pool in which to do the name expansion, hence should be a power of two,
  * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
  * maximum number of symbolic links that may be expanded in a path name.
  * It should be set high enough to allow all legitimate uses, but halt
  * infinite loops reasonably quickly.
  */
 #define	MAXPATHLEN	PATH_MAX
 #define MAXSYMLINKS	32
 
 /* Bit map related macros. */
 #define	setbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
 #define	clrbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
 #define	isset(a,i)							\
 	(((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
 #define	isclr(a,i)							\
 	((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
 
 /* Macros for counting and rounding. */
 #ifndef howmany
 #define	howmany(x, y)	(((x)+((y)-1))/(y))
 #endif
 #define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
 #define	rounddown(x, y)	(((x)/(y))*(y))
 #define	rounddown2(x, y) ((x)&(~((y)-1)))          /* if y is power of two */
 #define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
 #define	roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
 #define powerof2(x)	((((x)-1)&(x))==0)
 
 /* Macros for min/max. */
 #define	MIN(a,b) (((a)<(b))?(a):(b))
 #define	MAX(a,b) (((a)>(b))?(a):(b))
 
 #ifdef _KERNEL
 /*
  * Basic byte order function prototypes for non-inline functions.
  */
 #ifndef LOCORE
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 __BEGIN_DECLS
 __uint32_t	 htonl(__uint32_t);
 __uint16_t	 htons(__uint16_t);
 __uint32_t	 ntohl(__uint32_t);
 __uint16_t	 ntohs(__uint16_t);
 __END_DECLS
 #endif
 #endif
 
 #ifndef lint
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif /* !_BYTEORDER_FUNC_DEFINED */
 #endif /* lint */
 #endif /* _KERNEL */
 
 /*
  * Scale factor for scaled integers used to count %cpu time and load avgs.
  *
  * The number of CPU `tick's that map to a unique `%age' can be expressed
  * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
  * can be calculated (assuming 32 bits) can be closely approximated using
  * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
  *
  * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
  * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
  */
 #define	FSHIFT	11		/* bits to right of fixed binary point */
 #define FSCALE	(1<<FSHIFT)
 
 #define dbtoc(db)			/* calculates devblks to pages */ \
 	((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT))
 
 #define ctodb(db)			/* calculates pages to devblks */ \
 	((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
 /*
  * Old spelling of __containerof().
  */
 #define	member2struct(s, m, x)						\
 	((struct s *)(void *)((char *)(x) - offsetof(struct s, m)))
 
 /*
  * Access a variable length array that has been declared as a fixed
  * length array.
  */
 #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset])
 
 #endif	/* _SYS_PARAM_H_ */
Index: head/sys/sys/stat.h
===================================================================
--- head/sys/sys/stat.h	(revision 318735)
+++ head/sys/sys/stat.h	(revision 318736)
@@ -1,361 +1,402 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)stat.h	8.12 (Berkeley) 6/16/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_STAT_H_
 #define	_SYS_STAT_H_
 
 #include <sys/cdefs.h>
 #include <sys/_timespec.h>
 #include <sys/_types.h>
 
 #ifndef _BLKSIZE_T_DECLARED
 typedef	__blksize_t	blksize_t;
 #define	_BLKSIZE_T_DECLARED
 #endif
 
 #ifndef _BLKCNT_T_DECLARED
 typedef	__blkcnt_t	blkcnt_t;
 #define	_BLKCNT_T_DECLARED
 #endif
 
 #ifndef _DEV_T_DECLARED
 typedef	__dev_t		dev_t;
 #define	_DEV_T_DECLARED
 #endif
 
 #ifndef _FFLAGS_T_DECLARED
 typedef	__fflags_t	fflags_t;
 #define	_FFLAGS_T_DECLARED
 #endif
 
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _INO_T_DECLARED
 typedef	__ino_t		ino_t;
 #define	_INO_T_DECLARED
 #endif
 
 #ifndef _MODE_T_DECLARED
 typedef	__mode_t	mode_t;
 #define	_MODE_T_DECLARED
 #endif
 
 #ifndef _NLINK_T_DECLARED
 typedef	__nlink_t	nlink_t;
 #define	_NLINK_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _UID_T_DECLARED
 typedef	__uid_t		uid_t;
 #define	_UID_T_DECLARED
 #endif
 
 #if !defined(_KERNEL) && __BSD_VISIBLE
 /*
  * XXX We get miscellaneous namespace pollution with this.
  */
 #include <sys/time.h>
 #endif
 
 #ifdef _KERNEL
 struct ostat {
 	__uint16_t st_dev;		/* inode's device */
-	ino_t	  st_ino;		/* inode's number */
+	__uint32_t st_ino;		/* inode's number */
 	mode_t	  st_mode;		/* inode protection mode */
-	nlink_t	  st_nlink;		/* number of hard links */
+	__uint16_t st_nlink;		/* number of hard links */
 	__uint16_t st_uid;		/* user ID of the file's owner */
 	__uint16_t st_gid;		/* group ID of the file's group */
 	__uint16_t st_rdev;		/* device type */
 	__int32_t st_size;		/* file size, in bytes */
 	struct	timespec st_atim;	/* time of last access */
 	struct	timespec st_mtim;	/* time of last data modification */
 	struct	timespec st_ctim;	/* time of last file status change */
 	__int32_t st_blksize;		/* optimal blocksize for I/O */
 	__int32_t st_blocks;		/* blocks allocated for file */
 	fflags_t  st_flags;		/* user defined flags for file */
 	__uint32_t st_gen;		/* file generation number */
 };
 #endif
 
-struct stat {
-	__dev_t   st_dev;		/* inode's device */
-	ino_t	  st_ino;		/* inode's number */
+#if defined(_WANT_FREEBSD11_STAT) || defined(_KERNEL)
+struct freebsd11_stat {
+	__uint32_t st_dev;		/* inode's device */
+	__uint32_t st_ino;		/* inode's number */
 	mode_t	  st_mode;		/* inode protection mode */
-	nlink_t	  st_nlink;		/* number of hard links */
+	__uint16_t st_nlink;		/* number of hard links */
 	uid_t	  st_uid;		/* user ID of the file's owner */
 	gid_t	  st_gid;		/* group ID of the file's group */
-	__dev_t   st_rdev;		/* device type */
+	__uint32_t st_rdev;		/* device type */
 	struct	timespec st_atim;	/* time of last access */
 	struct	timespec st_mtim;	/* time of last data modification */
 	struct	timespec st_ctim;	/* time of last file status change */
 	off_t	  st_size;		/* file size, in bytes */
 	blkcnt_t st_blocks;		/* blocks allocated for file */
 	blksize_t st_blksize;		/* optimal blocksize for I/O */
 	fflags_t  st_flags;		/* user defined flags for file */
 	__uint32_t st_gen;		/* file generation number */
 	__int32_t st_lspare;
 	struct timespec st_birthtim;	/* time of file creation */
 	/*
 	 * Explicitly pad st_birthtim to 16 bytes so that the size of
 	 * struct stat is backwards compatible.  We use bitfields instead
 	 * of an array of chars so that this doesn't require a C99 compiler
 	 * to compile if the size of the padding is 0.  We use 2 bitfields
 	 * to cover up to 64 bits on 32-bit machines.  We assume that
 	 * CHAR_BIT is 8...
 	 */
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
 };
+#endif /* _WANT_FREEBSD11_STAT || _KERNEL */
 
+#if defined(__i386__)
+#define	__STAT_TIME_T_EXT	1
+#endif
+
+struct stat {
+	dev_t     st_dev;		/* inode's device */
+	ino_t	  st_ino;		/* inode's number */
+	nlink_t	  st_nlink;		/* number of hard links */
+	mode_t	  st_mode;		/* inode protection mode */
+	__int16_t st_padding0;
+	uid_t	  st_uid;		/* user ID of the file's owner */
+	gid_t	  st_gid;		/* group ID of the file's group */
+	__int32_t st_padding1;
+	dev_t     st_rdev;		/* device type */
+#ifdef	__STAT_TIME_T_EXT
+	__int32_t st_atim_ext;
+#endif
+	struct	timespec st_atim;	/* time of last access */
+#ifdef	__STAT_TIME_T_EXT
+	__int32_t st_mtim_ext;
+#endif
+	struct	timespec st_mtim;	/* time of last data modification */
+#ifdef	__STAT_TIME_T_EXT
+	__int32_t st_ctim_ext;
+#endif
+	struct	timespec st_ctim;	/* time of last file status change */
+#ifdef	__STAT_TIME_T_EXT
+	__int32_t st_btim_ext;
+#endif
+	struct	timespec st_birthtim;	/* time of file creation */
+	off_t	  st_size;		/* file size, in bytes */
+	blkcnt_t st_blocks;		/* blocks allocated for file */
+	blksize_t st_blksize;		/* optimal blocksize for I/O */
+	fflags_t  st_flags;		/* user defined flags for file */
+	__uint64_t st_gen;		/* file generation number */
+	__uint64_t st_spare[10];
+};
+
 #ifdef _KERNEL
 struct nstat {
-	__dev_t   st_dev;		/* inode's device */
-	ino_t	  st_ino;		/* inode's number */
+	__uint32_t st_dev;		/* inode's device */
+	__uint32_t st_ino;		/* inode's number */
 	__uint32_t st_mode;		/* inode protection mode */
 	__uint32_t st_nlink;		/* number of hard links */
 	uid_t	  st_uid;		/* user ID of the file's owner */
 	gid_t	  st_gid;		/* group ID of the file's group */
-	__dev_t   st_rdev;		/* device type */
+	__uint32_t st_rdev;		/* device type */
 	struct	timespec st_atim;	/* time of last access */
 	struct	timespec st_mtim;	/* time of last data modification */
 	struct	timespec st_ctim;	/* time of last file status change */
 	off_t	  st_size;		/* file size, in bytes */
 	blkcnt_t st_blocks;		/* blocks allocated for file */
 	blksize_t st_blksize;		/* optimal blocksize for I/O */
 	fflags_t  st_flags;		/* user defined flags for file */
 	__uint32_t st_gen;		/* file generation number */
 	struct timespec st_birthtim;	/* time of file creation */
 	/*
-	 * See above about the following padding.
+	 * See comment in the definition of struct freebsd11_stat
+	 * above about the following padding.
 	 */
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
 	unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
 };
 #endif
 
 #ifndef _KERNEL
 #define	st_atime		st_atim.tv_sec
 #define	st_mtime		st_mtim.tv_sec
 #define	st_ctime		st_ctim.tv_sec
 #if __BSD_VISIBLE
 #define	st_birthtime		st_birthtim.tv_sec
 #endif
 
 /* For compatibility. */
 #if __BSD_VISIBLE
 #define	st_atimespec		st_atim
 #define	st_mtimespec		st_mtim
 #define	st_ctimespec		st_ctim
 #define	st_birthtimespec	st_birthtim
 #endif
 #endif /* !_KERNEL */
 
 #define	S_ISUID	0004000			/* set user id on execution */
 #define	S_ISGID	0002000			/* set group id on execution */
 #if __BSD_VISIBLE
 #define	S_ISTXT	0001000			/* sticky bit */
 #endif
 
 #define	S_IRWXU	0000700			/* RWX mask for owner */
 #define	S_IRUSR	0000400			/* R for owner */
 #define	S_IWUSR	0000200			/* W for owner */
 #define	S_IXUSR	0000100			/* X for owner */
 
 #if __BSD_VISIBLE
 #define	S_IREAD		S_IRUSR
 #define	S_IWRITE	S_IWUSR
 #define	S_IEXEC		S_IXUSR
 #endif
 
 #define	S_IRWXG	0000070			/* RWX mask for group */
 #define	S_IRGRP	0000040			/* R for group */
 #define	S_IWGRP	0000020			/* W for group */
 #define	S_IXGRP	0000010			/* X for group */
 
 #define	S_IRWXO	0000007			/* RWX mask for other */
 #define	S_IROTH	0000004			/* R for other */
 #define	S_IWOTH	0000002			/* W for other */
 #define	S_IXOTH	0000001			/* X for other */
 
 #if __XSI_VISIBLE
 #define	S_IFMT	 0170000		/* type of file mask */
 #define	S_IFIFO	 0010000		/* named pipe (fifo) */
 #define	S_IFCHR	 0020000		/* character special */
 #define	S_IFDIR	 0040000		/* directory */
 #define	S_IFBLK	 0060000		/* block special */
 #define	S_IFREG	 0100000		/* regular */
 #define	S_IFLNK	 0120000		/* symbolic link */
 #define	S_IFSOCK 0140000		/* socket */
 #define	S_ISVTX	 0001000		/* save swapped text even after use */
 #endif
 #if __BSD_VISIBLE
 #define	S_IFWHT  0160000		/* whiteout */
 #endif
 
 #define	S_ISDIR(m)	(((m) & 0170000) == 0040000)	/* directory */
 #define	S_ISCHR(m)	(((m) & 0170000) == 0020000)	/* char special */
 #define	S_ISBLK(m)	(((m) & 0170000) == 0060000)	/* block special */
 #define	S_ISREG(m)	(((m) & 0170000) == 0100000)	/* regular file */
 #define	S_ISFIFO(m)	(((m) & 0170000) == 0010000)	/* fifo or socket */
 #if __POSIX_VISIBLE >= 200112
 #define	S_ISLNK(m)	(((m) & 0170000) == 0120000)	/* symbolic link */
 #define	S_ISSOCK(m)	(((m) & 0170000) == 0140000)	/* socket */
 #endif
 #if __BSD_VISIBLE
 #define	S_ISWHT(m)	(((m) & 0170000) == 0160000)	/* whiteout */
 #endif
 
 #if __BSD_VISIBLE
 #define	ACCESSPERMS	(S_IRWXU|S_IRWXG|S_IRWXO)	/* 0777 */
 							/* 7777 */
 #define	ALLPERMS	(S_ISUID|S_ISGID|S_ISTXT|S_IRWXU|S_IRWXG|S_IRWXO)
 							/* 0666 */
 #define	DEFFILEMODE	(S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
 
 #define S_BLKSIZE	512		/* block size used in the stat struct */
 
 /*
  * Definitions of flags stored in file flags word.
  *
  * Super-user and owner changeable flags.
  */
 #define	UF_SETTABLE	0x0000ffff	/* mask of owner changeable flags */
 #define	UF_NODUMP	0x00000001	/* do not dump file */
 #define	UF_IMMUTABLE	0x00000002	/* file may not be changed */
 #define	UF_APPEND	0x00000004	/* writes to file may only append */
 #define	UF_OPAQUE	0x00000008	/* directory is opaque wrt. union */
 #define	UF_NOUNLINK	0x00000010	/* file may not be removed or renamed */
 /*
  * These two bits are defined in MacOS X.  They are not currently used in
  * FreeBSD.
  */
 #if 0
 #define	UF_COMPRESSED	0x00000020	/* file is compressed */
 #define	UF_TRACKED	0x00000040	/* renames and deletes are tracked */
 #endif
 
 #define	UF_SYSTEM	0x00000080	/* Windows system file bit */
 #define	UF_SPARSE	0x00000100	/* sparse file */
 #define	UF_OFFLINE	0x00000200	/* file is offline */
 #define	UF_REPARSE	0x00000400	/* Windows reparse point file bit */
 #define	UF_ARCHIVE	0x00000800	/* file needs to be archived */
 #define	UF_READONLY	0x00001000	/* Windows readonly file bit */
 /* This is the same as the MacOS X definition of UF_HIDDEN. */
 #define	UF_HIDDEN	0x00008000	/* file is hidden */
 
 /*
  * Super-user changeable flags.
  */
 #define	SF_SETTABLE	0xffff0000	/* mask of superuser changeable flags */
 #define	SF_ARCHIVED	0x00010000	/* file is archived */
 #define	SF_IMMUTABLE	0x00020000	/* file may not be changed */
 #define	SF_APPEND	0x00040000	/* writes to file may only append */
 #define	SF_NOUNLINK	0x00100000	/* file may not be removed or renamed */
 #define	SF_SNAPSHOT	0x00200000	/* snapshot inode */
 
 #ifdef _KERNEL
 /*
  * Shorthand abbreviations of above.
  */
 #define	OPAQUE		(UF_OPAQUE)
 #define	APPEND		(UF_APPEND | SF_APPEND)
 #define	IMMUTABLE	(UF_IMMUTABLE | SF_IMMUTABLE)
 #define	NOUNLINK	(UF_NOUNLINK | SF_NOUNLINK)
 #endif
 
 #endif /* __BSD_VISIBLE */
 
 #if __POSIX_VISIBLE >= 200809
 #define	UTIME_NOW	-1
 #define	UTIME_OMIT	-2
 #endif
 
 #ifndef _KERNEL
 __BEGIN_DECLS
 #if __BSD_VISIBLE
 int	chflags(const char *, unsigned long);
 int	chflagsat(int, const char *, unsigned long, int);
 #endif
 int	chmod(const char *, mode_t);
 #if __BSD_VISIBLE
 int	fchflags(int, unsigned long);
 #endif
 #if __POSIX_VISIBLE >= 200112
 int	fchmod(int, mode_t);
 #endif
 #if __POSIX_VISIBLE >= 200809
 int	fchmodat(int, const char *, mode_t, int);
 int	futimens(int fd, const struct timespec times[2]);
 int	utimensat(int fd, const char *path, const struct timespec times[2],
 		int flag);
 #endif
 int	fstat(int, struct stat *);
 #if __BSD_VISIBLE
 int	lchflags(const char *, unsigned long);
 int	lchmod(const char *, mode_t);
 #endif
 #if __POSIX_VISIBLE >= 200112
 int	lstat(const char * __restrict, struct stat * __restrict);
 #endif
 int	mkdir(const char *, mode_t);
 int	mkfifo(const char *, mode_t);
 #if !defined(_MKNOD_DECLARED) && __XSI_VISIBLE
 int	mknod(const char *, mode_t, dev_t);
 #define	_MKNOD_DECLARED
 #endif
 int	stat(const char * __restrict, struct stat * __restrict);
 mode_t	umask(mode_t);
 #if __POSIX_VISIBLE >= 200809
 int	fstatat(int, const char *, struct stat *, int);
 int	mkdirat(int, const char *, mode_t);
 int	mkfifoat(int, const char *, mode_t);
 #endif
 #if __XSI_VISIBLE >= 700
 int	mknodat(int, const char *, mode_t, dev_t);
 #endif
 __END_DECLS
 #endif /* !_KERNEL */
 
 #endif /* !_SYS_STAT_H_ */
Index: head/sys/sys/syscallsubr.h
===================================================================
--- head/sys/sys/syscallsubr.h	(revision 318735)
+++ head/sys/sys/syscallsubr.h	(revision 318736)
@@ -1,295 +1,300 @@
 /*-
  * Copyright (c) 2002 Ian Dowse.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSCALLSUBR_H_
 #define _SYS_SYSCALLSUBR_H_
 
 #include <sys/signal.h>
 #include <sys/uio.h>
 #include <sys/socket.h>
 #include <sys/mac.h>
 #include <sys/mount.h>
 #include <sys/_cpuset.h>
 
 struct file;
 struct filecaps;
 enum idtype;
 struct itimerval;
 struct image_args;
 struct jail;
 struct kevent;
 struct kevent_copyops;
 struct kld_file_stat;
 struct ksiginfo;
 struct mbuf;
 struct msghdr;
 struct msqid_ds;
 struct pollfd;
 struct ogetdirentries_args;
 struct rlimit;
 struct rusage;
 union semun;
 struct sockaddr;
 struct stat;
 struct thr_param;
 struct sched_param;
 struct __wrusage;
 
 int	kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg,
 	    size_t buflen, size_t path_max);
 int	kern_accept(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, struct file **fp);
 int	kern_accept4(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, int flags, struct file **fp);
 int	kern_accessat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_adjtime(struct thread *td, struct timeval *delta,
 	    struct timeval *olddelta);
 int	kern_alternate_path(struct thread *td, const char *prefix, const char *path,
 	    enum uio_seg pathseg, char **pathbuf, int create, int dirfd);
 int	kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa);
 int	kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds,
 	    size_t ncmds);
 int	kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights);
 int	kern_chdir(struct thread *td, char *path, enum uio_seg pathseg);
 int	kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
 	    clockid_t *clk_id);
 int	kern_clock_getres(struct thread *td, clockid_t clock_id,
 	    struct timespec *ts);
 int	kern_clock_gettime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 	    const struct timespec *rqtp, struct timespec *rmtp);
 int	kern_clock_settime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_close(struct thread *td, int fd);
 int	kern_connectat(struct thread *td, int dirfd, int fd,
 	    struct sockaddr *sa);
 int	kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
 int	kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize,
 	    const cpuset_t *maskp);
 int	kern_cpuset_getid(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, cpusetid_t *setid);
 int	kern_cpuset_setid(struct thread *td, cpuwhich_t which,
 	    id_t id, cpusetid_t setid);
 int	kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
 int	kern_execve(struct thread *td, struct image_args *args,
 	    struct mac *mac_p);
 int	kern_fchmodat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, mode_t mode, int flag);
 int	kern_fchownat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, int uid, int gid, int flag);
 int	kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg);
 int	kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg);
 int	kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf);
 int	kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf);
 int	kern_fstat(struct thread *td, int fd, struct stat *sbp);
 int	kern_fstatfs(struct thread *td, int fd, struct statfs *buf);
 int	kern_fsync(struct thread *td, int fd, bool fullsync);
 int	kern_ftruncate(struct thread *td, int fd, off_t length);
 int	kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 	    enum uio_seg tptrseg);
 int	kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 	    enum uio_seg tptrseg);
-int	kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
-	    long *basep, ssize_t *residp, enum uio_seg bufseg);
+int	kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
+	    off_t *basep, ssize_t *residp, enum uio_seg bufseg);
 int	kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
 	    size_t *countp, enum uio_seg bufseg, int mode);
 int	kern_getitimer(struct thread *, u_int, struct itimerval *);
 int	kern_getppid(struct thread *);
 int	kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getrusage(struct thread *td, int who, struct rusage *rup);
 int	kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getsockopt(struct thread *td, int s, int level, int name,
 	    void *optval, enum uio_seg valseg, socklen_t *valsize);
 int	kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
 int	kern_jail(struct thread *td, struct jail *j);
 int	kern_jail_get(struct thread *td, struct uio *options, int flags);
 int	kern_jail_set(struct thread *td, struct uio *options, int flags);
 int	kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
 	    struct kevent_copyops *k_ops, const struct timespec *timeout);
 int	kern_kevent_anonymous(struct thread *td, int nevents,
 	    struct kevent_copyops *k_ops);
 int	kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
 	    int nevents, struct kevent_copyops *k_ops,
 	    const struct timespec *timeout);
 int	kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
 int	kern_kldload(struct thread *td, const char *file, int *fileid);
 int	kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
 int	kern_kldunload(struct thread *td, int fileid, int flags);
 int	kern_linkat(struct thread *td, int fd1, int fd2, char *path1,
 	    char *path2, enum uio_seg segflg, int follow);
 int	kern_listen(struct thread *td, int s, int backlog);
 int	kern_lseek(struct thread *td, int fd, off_t offset, int whence);
 int	kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
 	    struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav);
 int	kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec);
 int	kern_mkdirat(struct thread *td, int fd, char *path,
 	    enum uio_seg segflg, int mode);
 int	kern_mkfifoat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, int mode);
 int	kern_mknodat(struct thread *td, int fd, char *path,
-	    enum uio_seg pathseg, int mode, int dev);
+	    enum uio_seg pathseg, int mode, dev_t dev);
 int	kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr,
 	    size_t len);
 int	kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot,
 	    int flags, int fd, off_t pos);
 int	kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
 int	kern_msgctl(struct thread *, int, int, struct msqid_ds *);
 int	kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
 int	kern_msgsnd(struct thread *, int, const void *, size_t, int, long);
 int	kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags);
 int	kern_munlock(struct thread *td, uintptr_t addr, size_t size);
 int	kern_munmap(struct thread *td, uintptr_t addr, size_t size);
 int     kern_nanosleep(struct thread *td, struct timespec *rqt,
 	    struct timespec *rmt);
 int	kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 	    long *ploff);
 int	kern_openat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg,
 	    int name, u_long flags);
 int	kern_pipe(struct thread *td, int fildes[2], int flags,
 	    struct filecaps *fcaps1, struct filecaps *fcaps2);
 int	kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
 	    struct timespec *tsp, sigset_t *uset);
 int	kern_posix_error(struct thread *td, int error);
 int	kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 	    int advice);
 int	kern_posix_fallocate(struct thread *td, int fd, off_t offset,
 	    off_t len);
 int	kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
 	    void *data);
 int	kern_pread(struct thread *td, int fd, void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
 	    fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
 int	kern_ptrace(struct thread *td, int req, pid_t pid, void *addr,
 	    int data);
 int	kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_readlinkat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count);
 int	kern_readv(struct thread *td, int fd, struct uio *auio);
 int	kern_recvit(struct thread *td, int s, struct msghdr *mp,
 	    enum uio_seg fromseg, struct mbuf **controlp);
 int	kern_renameat(struct thread *td, int oldfd, char *old, int newfd,
 	    char *new, enum uio_seg pathseg);
 int	kern_rmdirat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg);
 int	kern_sched_getparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_getscheduler(struct thread *td, struct thread *targettd,
 	    int *policy);
 int	kern_sched_setparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_setscheduler(struct thread *td, struct thread *targettd,
 	    int policy, struct sched_param *param);
 int	kern_sched_rr_get_interval(struct thread *td, pid_t pid,
 	    struct timespec *ts);
 int	kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd,
 	    struct timespec *ts);
 int	kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	    union semun *arg, register_t *rval);
 int	kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
 int	kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 	    struct mbuf *control, enum uio_seg segflg);
 int	kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups);
 int	kern_setitimer(struct thread *, u_int, struct itimerval *,
 	    struct itimerval *);
 int	kern_setrlimit(struct thread *, u_int, struct rlimit *);
 int	kern_setsockopt(struct thread *td, int s, int level, int name,
 	    void *optval, enum uio_seg valseg, socklen_t valsize);
 int	kern_settimeofday(struct thread *td, struct timeval *tv,
 	    struct timezone *tzp);
 int	kern_shm_open(struct thread *td, const char *userpath, int flags,
 	    mode_t mode, struct filecaps *fcaps);
 int	kern_shmat(struct thread *td, int shmid, const void *shmaddr,
 	    int shmflg);
 int	kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
 	    size_t *bufsz);
 int	kern_shutdown(struct thread *td, int s, int how);
 int	kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
 	    struct sigaction *oact, int flags);
 int	kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss);
 int	kern_sigprocmask(struct thread *td, int how,
 	    sigset_t *set, sigset_t *oset, int flags);
 int	kern_sigsuspend(struct thread *td, sigset_t mask);
 int	kern_sigtimedwait(struct thread *td, sigset_t waitset,
 	    struct ksiginfo *ksi, struct timespec *timeout);
 int	kern_sigqueue(struct thread *td, pid_t pid, int signum,
 	    union sigval *value);
 int	kern_socket(struct thread *td, int domain, int type, int protocol);
 int	kern_statat(struct thread *td, int flag, int fd, char *path,
 	    enum uio_seg pathseg, struct stat *sbp,
 	    void (*hook)(struct vnode *vp, struct stat *sbp));
 int	kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
 	    struct statfs *buf);
 int	kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
 	    enum uio_seg segflg);
 int	kern_ktimer_create(struct thread *td, clockid_t clock_id,
 	    struct sigevent *evp, int *timerid, int preset_id);
 int	kern_ktimer_delete(struct thread *, int);
 int	kern_ktimer_settime(struct thread *td, int timer_id, int flags,
 	    struct itimerspec *val, struct itimerspec *oval);
 int	kern_ktimer_gettime(struct thread *td, int timer_id,
 	    struct itimerspec *val);
 int	kern_ktimer_getoverrun(struct thread *td, int timer_id);
 int	kern_thr_alloc(struct proc *, int pages, struct thread **);
 int	kern_thr_exit(struct thread *td);
 int	kern_thr_new(struct thread *td, struct thr_param *param);
 int	kern_thr_suspend(struct thread *td, struct timespec *tsp);
 int	kern_truncate(struct thread *td, char *path, enum uio_seg pathseg,
 	    off_t length);
 int	kern_unlinkat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, ino_t oldinum);
 int	kern_utimesat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_utimensat(struct thread *td, int fd, char *path,
 	    enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
 	    int follow);
 int	kern_wait(struct thread *td, pid_t pid, int *status, int options,
 	    struct rusage *rup);
 int	kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,
 	    int options, struct __wrusage *wrup, siginfo_t *sip);
 int	kern_writev(struct thread *td, int fd, struct uio *auio);
 int	kern_socketpair(struct thread *td, int domain, int type, int protocol,
 	    int *rsv);
 
 /* flags for kern_sigaction */
 #define	KSA_OSIGSET	0x0001	/* uses osigact_t */
 #define	KSA_FREEBSD4	0x0002	/* uses ucontext4 */
+
+struct freebsd11_dirent;
+
+int	freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int
+	    count, long *basep, void (*func)(struct freebsd11_dirent *));
 
 #endif /* !_SYS_SYSCALLSUBR_H_ */
Index: head/sys/sys/tty.h
===================================================================
--- head/sys/sys/tty.h	(revision 318735)
+++ head/sys/sys/tty.h	(revision 318736)
@@ -1,228 +1,228 @@
 /*-
  * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed under sponsorship from Snow
  * B.V., the Netherlands.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_TTY_H_
 #define	_SYS_TTY_H_
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/selinfo.h>
 #include <sys/_termios.h>
 #include <sys/ttycom.h>
 #include <sys/ttyqueue.h>
 
 struct cdev;
 struct file;
 struct pgrp;
 struct session;
 struct ucred;
 
 struct ttydevsw;
 
 /*
  * Per-TTY structure, containing buffers, etc.
  *
  * List of locks
  * (t)	locked by t_mtx
  * (l)	locked by tty_list_sx
  * (c)	const until freeing
  */
 struct tty {
 	struct mtx	*t_mtx;		/* TTY lock. */
 	struct mtx	t_mtxobj;	/* Per-TTY lock (when not borrowing). */
 	TAILQ_ENTRY(tty) t_list;	/* (l) TTY list entry. */
 	int		t_drainwait;	/* (t) TIOCDRAIN timeout seconds. */
 	unsigned int	t_flags;	/* (t) Terminal option flags. */
 /* Keep flags in sync with db_show_tty and pstat(8). */
 #define	TF_NOPREFIX	0x00001	/* Don't prepend "tty" to device name. */
 #define	TF_INITLOCK	0x00002	/* Create init/lock state devices. */
 #define	TF_CALLOUT	0x00004	/* Create "cua" devices. */
 #define	TF_OPENED_IN	0x00008	/* "tty" node is in use. */
 #define	TF_OPENED_OUT	0x00010	/* "cua" node is in use. */
 #define	TF_OPENED_CONS	0x00020 /* Device in use as console. */
 #define	TF_OPENED	(TF_OPENED_IN|TF_OPENED_OUT|TF_OPENED_CONS)
 #define	TF_GONE		0x00040	/* Device node is gone. */
 #define	TF_OPENCLOSE	0x00080	/* Device is in open()/close(). */
 #define	TF_ASYNC	0x00100	/* Asynchronous I/O enabled. */
 #define	TF_LITERAL	0x00200	/* Accept the next character literally. */
 #define	TF_HIWAT_IN	0x00400	/* We've reached the input watermark. */
 #define	TF_HIWAT_OUT	0x00800	/* We've reached the output watermark. */
 #define	TF_HIWAT	(TF_HIWAT_IN|TF_HIWAT_OUT)
 #define	TF_STOPPED	0x01000	/* Output flow control - stopped. */
 #define	TF_EXCLUDE	0x02000	/* Exclusive access. */
 #define	TF_BYPASS	0x04000	/* Optimized input path. */
 #define	TF_ZOMBIE	0x08000	/* Modem disconnect received. */
 #define	TF_HOOK		0x10000	/* TTY has hook attached. */
 #define	TF_BUSY_IN	0x20000	/* Process busy in read() -- not supported. */
 #define	TF_BUSY_OUT	0x40000	/* Process busy in write(). */
 #define	TF_BUSY		(TF_BUSY_IN|TF_BUSY_OUT)
 	unsigned int	t_revokecnt;	/* (t) revoke() count. */
 
 	/* Buffering mechanisms. */
 	struct ttyinq	t_inq;		/* (t) Input queue. */
 	size_t		t_inlow;	/* (t) Input low watermark. */
 	struct ttyoutq	t_outq;		/* (t) Output queue. */
 	size_t		t_outlow;	/* (t) Output low watermark. */
 
 	/* Sleeping mechanisms. */
 	struct cv	t_inwait;	/* (t) Input wait queue. */
 	struct cv	t_outwait;	/* (t) Output wait queue. */
 	struct cv	t_outserwait;	/* (t) Serial output wait queue. */
 	struct cv	t_bgwait;	/* (t) Background wait queue. */
 	struct cv	t_dcdwait;	/* (t) Carrier Detect wait queue. */
 
 	/* Polling mechanisms. */
 	struct selinfo	t_inpoll;	/* (t) Input poll queue. */
 	struct selinfo	t_outpoll;	/* (t) Output poll queue. */
 	struct sigio	*t_sigio;	/* (t) Asynchronous I/O. */
 
 	struct termios	t_termios;	/* (t) I/O processing flags. */
 	struct winsize	t_winsize;	/* (t) Window size. */
 	unsigned int	t_column;	/* (t) Current cursor position. */
 	unsigned int	t_writepos;	/* (t) Where input was interrupted. */
 	int		t_compatflags;	/* (t) COMPAT_43TTY flags. */
 
 	/* Init/lock-state devices. */
 	struct termios	t_termios_init_in;	/* tty%s.init. */
 	struct termios	t_termios_lock_in;	/* tty%s.lock. */
 	struct termios	t_termios_init_out;	/* cua%s.init. */
 	struct termios	t_termios_lock_out;	/* cua%s.lock. */
 
 	struct ttydevsw	*t_devsw;	/* (c) Driver hooks. */
 	struct ttyhook	*t_hook;	/* (t) Capture/inject hook. */
 
 	/* Process signal delivery. */
 	struct pgrp	*t_pgrp;	/* (t) Foreground process group. */
 	struct session	*t_session;	/* (t) Associated session. */
 	unsigned int	t_sessioncnt;	/* (t) Backpointing sessions. */
 
 	void		*t_devswsoftc;	/* (c) Soft config, for drivers. */
 	void		*t_hooksoftc;	/* (t) Soft config, for hooks. */
 	struct cdev	*t_dev;		/* (c) Primary character device. */
 };
 
 /*
  * Userland version of struct tty, for sysctl kern.ttys
  */
 struct xtty {
 	size_t	xt_size;	/* Structure size. */
 	size_t	xt_insize;	/* Input queue size. */
 	size_t	xt_incc;	/* Canonicalized characters. */
 	size_t	xt_inlc;	/* Input line charaters. */
 	size_t	xt_inlow;	/* Input low watermark. */
 	size_t	xt_outsize;	/* Output queue size. */
 	size_t	xt_outcc;	/* Output queue usage. */
 	size_t	xt_outlow;	/* Output low watermark. */
 	unsigned int xt_column;	/* Current column position. */
 	pid_t	xt_pgid;	/* Foreground process group. */
 	pid_t	xt_sid;		/* Session. */
 	unsigned int xt_flags;	/* Terminal option flags. */
-	dev_t	xt_dev;		/* Userland device. */
+	uint32_t xt_dev;	/* Userland device. XXXKIB truncated */
 };
 
 #ifdef _KERNEL
 
 /* Used to distinguish between normal, callout, lock and init devices. */
 #define	TTYUNIT_INIT		0x1
 #define	TTYUNIT_LOCK		0x2
 #define	TTYUNIT_CALLOUT		0x4
 
 /* Allocation and deallocation. */
 struct tty *tty_alloc(struct ttydevsw *tsw, void *softc);
 struct tty *tty_alloc_mutex(struct ttydevsw *tsw, void *softc, struct mtx *mtx);
 void	tty_rel_pgrp(struct tty *tp, struct pgrp *pgrp);
 void	tty_rel_sess(struct tty *tp, struct session *sess);
 void	tty_rel_gone(struct tty *tp);
 
 #define	tty_lock(tp)		mtx_lock((tp)->t_mtx)
 #define	tty_unlock(tp)		mtx_unlock((tp)->t_mtx)
 #define	tty_lock_owned(tp)	mtx_owned((tp)->t_mtx)
 #define	tty_lock_assert(tp,ma)	mtx_assert((tp)->t_mtx, (ma))
 #define	tty_getlock(tp)		((tp)->t_mtx)
 
 /* Device node creation. */
 int	tty_makedevf(struct tty *tp, struct ucred *cred, int flags,
     const char *fmt, ...) __printflike(4, 5);
 #define	TTYMK_CLONING		0x1
 #define	tty_makedev(tp, cred, fmt, ...) \
 	(void )tty_makedevf((tp), (cred), 0, (fmt), ## __VA_ARGS__)
 #define	tty_makealias(tp,fmt,...) \
 	make_dev_alias((tp)->t_dev, fmt, ## __VA_ARGS__)
 
 /* Signalling processes. */
 void	tty_signal_sessleader(struct tty *tp, int signal);
 void	tty_signal_pgrp(struct tty *tp, int signal);
 /* Waking up readers/writers. */
 int	tty_wait(struct tty *tp, struct cv *cv);
 int	tty_wait_background(struct tty *tp, struct thread *td, int sig);
 int	tty_timedwait(struct tty *tp, struct cv *cv, int timo);
 void	tty_wakeup(struct tty *tp, int flags);
 
 /* System messages. */
 int	tty_checkoutq(struct tty *tp);
 int	tty_putchar(struct tty *tp, char c);
 
 int	tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag,
     struct thread *td);
 int	tty_ioctl_compat(struct tty *tp, u_long cmd, caddr_t data,
     int fflag, struct thread *td);
 void	tty_set_winsize(struct tty *tp, const struct winsize *wsz);
 void	tty_init_console(struct tty *tp, speed_t speed);
 void	tty_flush(struct tty *tp, int flags);
 void	tty_hiwat_in_block(struct tty *tp);
 void	tty_hiwat_in_unblock(struct tty *tp);
 dev_t	tty_udev(struct tty *tp);
 #define	tty_opened(tp)		((tp)->t_flags & TF_OPENED)
 #define	tty_gone(tp)		((tp)->t_flags & TF_GONE)
 #define	tty_softc(tp)		((tp)->t_devswsoftc)
 #define	tty_devname(tp)		devtoname((tp)->t_dev)
 
 /* Status line printing. */
 void	tty_info(struct tty *tp);
 
 /* /dev/console selection. */
 void	ttyconsdev_select(const char *name);
 
 /* Pseudo-terminal hooks. */
 int	pts_alloc(int fflags, struct thread *td, struct file *fp);
 int	pts_alloc_external(int fd, struct thread *td, struct file *fp,
     struct cdev *dev, const char *name);
 
 /* Drivers and line disciplines also need to call these. */
 #include <sys/ttydisc.h>
 #include <sys/ttydevsw.h>
 #include <sys/ttyhook.h>
 #endif /* _KERNEL */
 
 #endif /* !_SYS_TTY_H_ */
Index: head/sys/sys/user.h
===================================================================
--- head/sys/sys/user.h	(revision 318735)
+++ head/sys/sys/user.h	(revision 318736)
@@ -1,569 +1,603 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)user.h	8.2 (Berkeley) 9/23/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_USER_H_
 #define _SYS_USER_H_
 
 #include <machine/pcb.h>
 #ifndef _KERNEL
 /* stuff that *used* to be included by user.h, or is now needed */
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/ucred.h>
 #include <sys/uio.h>
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/proc.h>
 #include <vm/vm.h>		/* XXX */
 #include <vm/vm_param.h>	/* XXX */
 #include <vm/pmap.h>		/* XXX */
 #include <vm/vm_map.h>		/* XXX */
 #endif /* !_KERNEL */
 #ifndef _SYS_RESOURCEVAR_H_
 #include <sys/resourcevar.h>
 #endif
 #ifndef _SYS_SIGNALVAR_H_
 #include <sys/signalvar.h>
 #endif
 #ifndef _SYS_SOCKET_VAR_H_
 #include <sys/socket.h>
 #endif
 #include <sys/caprights.h>
 
 /*
  * KERN_PROC subtype ops return arrays of selected proc structure entries:
  *
  * This struct includes several arrays of spare space, with different arrays
  * for different standard C-types.  When adding new variables to this struct,
  * the space for byte-aligned data should be taken from the ki_sparestring,
  * pointers from ki_spareptrs, word-aligned data from ki_spareints, and
  * doubleword-aligned data from ki_sparelongs.  Make sure the space for new
  * variables come from the array which matches the size and alignment of
  * those variables on ALL hardware platforms, and then adjust the appropriate
  * KI_NSPARE_* value(s) to match.
  *
  * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all
  * platforms after you have added new variables.  Note that if you change
  * the value of KINFO_PROC_SIZE, then many userland programs will stop
  * working until they are recompiled!
  *
  * Once you have added the new field, you will need to add code to initialize
  * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
  * function kvm_proclist in lib/libkvm/kvm_proc.c .
  */
-#define	KI_NSPARE_INT	4
+#define	KI_NSPARE_INT	2
 #define	KI_NSPARE_LONG	12
 #define	KI_NSPARE_PTR	6
 
 #ifndef _KERNEL
 #ifndef KINFO_PROC_SIZE
 #error "Unknown architecture"
 #endif
 #endif /* !_KERNEL */
 
 #define	WMESGLEN	8		/* size of returned wchan message */
 #define	LOCKNAMELEN	8		/* size of returned lock name */
 #define	TDNAMLEN	16		/* size of returned thread name */
 #define	COMMLEN		19		/* size of returned ki_comm name */
 #define	KI_EMULNAMELEN	16		/* size of returned ki_emul */
 #define	KI_NGROUPS	16		/* number of groups in ki_groups */
 #define	LOGNAMELEN	17		/* size of returned ki_login */
 #define	LOGINCLASSLEN	17		/* size of returned ki_loginclass */
 
 #ifndef BURN_BRIDGES
 #define	OCOMMLEN	TDNAMLEN	
 #define	ki_ocomm	ki_tdname
 #endif
 
 /* Flags for the process credential. */
 #define	KI_CRF_CAPABILITY_MODE	0x00000001
 /*
  * Steal a bit from ki_cr_flags to indicate that the cred had more than
  * KI_NGROUPS groups.
  */
 #define KI_CRF_GRP_OVERFLOW	0x80000000
 
 struct kinfo_proc {
 	int	ki_structsize;		/* size of this structure */
 	int	ki_layout;		/* reserved: layout identifier */
 	struct	pargs *ki_args;		/* address of command arguments */
 	struct	proc *ki_paddr;		/* address of proc */
 	struct	user *ki_addr;		/* kernel virtual addr of u-area */
 	struct	vnode *ki_tracep;	/* pointer to trace file */
 	struct	vnode *ki_textvp;	/* pointer to executable file */
 	struct	filedesc *ki_fd;	/* pointer to open file info */
 	struct	vmspace *ki_vmspace;	/* pointer to kernel vmspace struct */
 	void	*ki_wchan;		/* sleep address */
 	pid_t	ki_pid;			/* Process identifier */
 	pid_t	ki_ppid;		/* parent process id */
 	pid_t	ki_pgid;		/* process group id */
 	pid_t	ki_tpgid;		/* tty process group id */
 	pid_t	ki_sid;			/* Process session ID */
 	pid_t	ki_tsid;		/* Terminal session ID */
 	short	ki_jobc;		/* job control counter */
 	short	ki_spare_short1;	/* unused (just here for alignment) */
-	dev_t	ki_tdev;		/* controlling tty dev */
+	uint32_t ki_tdev_freebsd11;	/* controlling tty dev */
 	sigset_t ki_siglist;		/* Signals arrived but not delivered */
 	sigset_t ki_sigmask;		/* Current signal mask */
 	sigset_t ki_sigignore;		/* Signals being ignored */
 	sigset_t ki_sigcatch;		/* Signals being caught by user */
 	uid_t	ki_uid;			/* effective user id */
 	uid_t	ki_ruid;		/* Real user id */
 	uid_t	ki_svuid;		/* Saved effective user id */
 	gid_t	ki_rgid;		/* Real group id */
 	gid_t	ki_svgid;		/* Saved effective group id */
 	short	ki_ngroups;		/* number of groups */
 	short	ki_spare_short2;	/* unused (just here for alignment) */
 	gid_t	ki_groups[KI_NGROUPS];	/* groups */
 	vm_size_t ki_size;		/* virtual size */
 	segsz_t ki_rssize;		/* current resident set size in pages */
 	segsz_t ki_swrss;		/* resident set size before last swap */
 	segsz_t ki_tsize;		/* text size (pages) XXX */
 	segsz_t ki_dsize;		/* data size (pages) XXX */
 	segsz_t ki_ssize;		/* stack size (pages) */
 	u_short	ki_xstat;		/* Exit status for wait & stop signal */
 	u_short	ki_acflag;		/* Accounting flags */
 	fixpt_t	ki_pctcpu;	 	/* %cpu for process during ki_swtime */
 	u_int	ki_estcpu;	 	/* Time averaged value of ki_cpticks */
 	u_int	ki_slptime;	 	/* Time since last blocked */
 	u_int	ki_swtime;	 	/* Time swapped in or out */
 	u_int	ki_cow;			/* number of copy-on-write faults */
 	u_int64_t ki_runtime;		/* Real time in microsec */
 	struct	timeval ki_start;	/* starting time */
 	struct	timeval ki_childtime;	/* time used by process children */
 	long	ki_flag;		/* P_* flags */
 	long	ki_kiflag;		/* KI_* flags (below) */
 	int	ki_traceflag;		/* Kernel trace points */
 	char	ki_stat;		/* S* process status */
 	signed char ki_nice;		/* Process "nice" value */
 	char	ki_lock;		/* Process lock (prevent swap) count */
 	char	ki_rqindex;		/* Run queue index */
 	u_char	ki_oncpu_old;		/* Which cpu we are on (legacy) */
 	u_char	ki_lastcpu_old;		/* Last cpu we were on (legacy) */
 	char	ki_tdname[TDNAMLEN+1];	/* thread name */
 	char	ki_wmesg[WMESGLEN+1];	/* wchan message */
 	char	ki_login[LOGNAMELEN+1];	/* setlogin name */
 	char	ki_lockname[LOCKNAMELEN+1]; /* lock name */
 	char	ki_comm[COMMLEN+1];	/* command name */
 	char	ki_emul[KI_EMULNAMELEN+1];  /* emulation name */
 	char	ki_loginclass[LOGINCLASSLEN+1]; /* login class */
 	char	ki_moretdname[MAXCOMLEN-TDNAMLEN+1];	/* more thread name */
 	/*
 	 * When adding new variables, take space for char-strings from the
 	 * front of ki_sparestrings, and ints from the end of ki_spareints.
 	 * That way the spare room from both arrays will remain contiguous.
 	 */
 	char	ki_sparestrings[46];	/* spare string space */
 	int	ki_spareints[KI_NSPARE_INT];	/* spare room for growth */
+	uint64_t ki_tdev;		/* controlling tty dev */
 	int	ki_oncpu;		/* Which cpu we are on */
 	int	ki_lastcpu;		/* Last cpu we were on */
 	int	ki_tracer;		/* Pid of tracing process */
 	int	ki_flag2;		/* P2_* flags */
 	int	ki_fibnum;		/* Default FIB number */
 	u_int	ki_cr_flags;		/* Credential flags */
 	int	ki_jid;			/* Process jail ID */
 	int	ki_numthreads;		/* XXXKSE number of threads in total */
 	lwpid_t	ki_tid;			/* XXXKSE thread id */
 	struct	priority ki_pri;	/* process priority */
 	struct	rusage ki_rusage;	/* process rusage statistics */
 	/* XXX - most fields in ki_rusage_ch are not (yet) filled in */
 	struct	rusage ki_rusage_ch;	/* rusage of children processes */
 	struct	pcb *ki_pcb;		/* kernel virtual addr of pcb */
 	void	*ki_kstack;		/* kernel virtual addr of stack */
 	void	*ki_udata;		/* User convenience pointer */
 	struct	thread *ki_tdaddr;	/* address of thread */
 	/*
 	 * When adding new variables, take space for pointers from the
 	 * front of ki_spareptrs, and longs from the end of ki_sparelongs.
 	 * That way the spare room from both arrays will remain contiguous.
 	 */
 	void	*ki_spareptrs[KI_NSPARE_PTR];	/* spare room for growth */
 	long	ki_sparelongs[KI_NSPARE_LONG];	/* spare room for growth */
 	long	ki_sflag;		/* PS_* flags */
 	long	ki_tdflags;		/* XXXKSE kthread flag */
 };
 void fill_kinfo_proc(struct proc *, struct kinfo_proc *);
 /* XXX - the following two defines are temporary */
 #define	ki_childstime	ki_rusage_ch.ru_stime
 #define	ki_childutime	ki_rusage_ch.ru_utime
 
 /*
  *  Legacy PS_ flag.  This moved to p_flag but is maintained for
  *  compatibility.
  */
 #define	PS_INMEM	0x00001		/* Loaded into memory. */
 
 /* ki_sessflag values */
 #define	KI_CTTY		0x00000001	/* controlling tty vnode active */
 #define	KI_SLEADER	0x00000002	/* session leader */
 #define	KI_LOCKBLOCK	0x00000004	/* proc blocked on lock ki_lockname */
 
 /*
  * This used to be the per-process structure containing data that
  * isn't needed in core when the process is swapped out, but now it
  * remains only for the benefit of a.out core dumps.
  */
 struct user {
 	struct	pstats u_stats;		/* *p_stats */
 	struct	kinfo_proc u_kproc;	/* eproc */
 };
 
 /*
  * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
  * array of another process.
  */
 #define	KF_ATTR_VALID	0x0001
 
 #define	KF_TYPE_NONE	0
 #define	KF_TYPE_VNODE	1
 #define	KF_TYPE_SOCKET	2
 #define	KF_TYPE_PIPE	3
 #define	KF_TYPE_FIFO	4
 #define	KF_TYPE_KQUEUE	5
 #define	KF_TYPE_CRYPTO	6
 #define	KF_TYPE_MQUEUE	7
 #define	KF_TYPE_SHM	8
 #define	KF_TYPE_SEM	9
 #define	KF_TYPE_PTS	10
 #define	KF_TYPE_PROCDESC	11
 #define	KF_TYPE_UNKNOWN	255
 
 #define	KF_VTYPE_VNON	0
 #define	KF_VTYPE_VREG	1
 #define	KF_VTYPE_VDIR	2
 #define	KF_VTYPE_VBLK	3
 #define	KF_VTYPE_VCHR	4
 #define	KF_VTYPE_VLNK	5
 #define	KF_VTYPE_VSOCK	6
 #define	KF_VTYPE_VFIFO	7
 #define	KF_VTYPE_VBAD	8
 #define	KF_VTYPE_UNKNOWN	255
 
 #define	KF_FD_TYPE_CWD	-1	/* Current working directory */
 #define	KF_FD_TYPE_ROOT	-2	/* Root directory */
 #define	KF_FD_TYPE_JAIL	-3	/* Jail directory */
 #define	KF_FD_TYPE_TRACE	-4	/* Ktrace vnode */
 #define	KF_FD_TYPE_TEXT	-5	/* Text vnode */
 #define	KF_FD_TYPE_CTTY	-6	/* Controlling terminal */
 
 #define	KF_FLAG_READ		0x00000001
 #define	KF_FLAG_WRITE		0x00000002
 #define	KF_FLAG_APPEND		0x00000004
 #define	KF_FLAG_ASYNC		0x00000008
 #define	KF_FLAG_FSYNC		0x00000010
 #define	KF_FLAG_NONBLOCK	0x00000020
 #define	KF_FLAG_DIRECT		0x00000040
 #define	KF_FLAG_HASLOCK		0x00000080
 #define	KF_FLAG_SHLOCK		0x00000100
 #define	KF_FLAG_EXLOCK		0x00000200
 #define	KF_FLAG_NOFOLLOW	0x00000400
 #define	KF_FLAG_CREAT		0x00000800
 #define	KF_FLAG_TRUNC		0x00001000
 #define	KF_FLAG_EXCL		0x00002000
 #define	KF_FLAG_EXEC		0x00004000
 
 /*
  * Old format.  Has variable hidden padding due to alignment.
  * This is a compatibility hack for pre-build 7.1 packages.
  */
 #if defined(__amd64__)
 #define	KINFO_OFILE_SIZE	1328
 #endif
 #if defined(__i386__)
 #define	KINFO_OFILE_SIZE	1324
 #endif
 
 struct kinfo_ofile {
 	int	kf_structsize;			/* Size of kinfo_file. */
 	int	kf_type;			/* Descriptor type. */
 	int	kf_fd;				/* Array index. */
 	int	kf_ref_count;			/* Reference count. */
 	int	kf_flags;			/* Flags. */
 	/* XXX Hidden alignment padding here on amd64 */
 	off_t	kf_offset;			/* Seek location. */
 	int	kf_vnode_type;			/* Vnode type. */
 	int	kf_sock_domain;			/* Socket domain. */
 	int	kf_sock_type;			/* Socket type. */
 	int	kf_sock_protocol;		/* Socket protocol. */
 	char	kf_path[PATH_MAX];	/* Path to file, if any. */
 	struct sockaddr_storage kf_sa_local;	/* Socket address. */
 	struct sockaddr_storage	kf_sa_peer;	/* Peer address. */
 };
 
 #if defined(__amd64__) || defined(__i386__)
 /*
  * This size should never be changed. If you really need to, you must provide
  * backward ABI compatibility by allocating a new sysctl MIB that will return
  * the new structure. The current structure has to be returned by the current
  * sysctl MIB. See how it is done for the kinfo_ofile structure.
  */
 #define	KINFO_FILE_SIZE	1392
 #endif
 
 struct kinfo_file {
 	int		kf_structsize;		/* Variable size of record. */
 	int		kf_type;		/* Descriptor type. */
 	int		kf_fd;			/* Array index. */
 	int		kf_ref_count;		/* Reference count. */
 	int		kf_flags;		/* Flags. */
 	int		kf_pad0;		/* Round to 64 bit alignment. */
 	int64_t		kf_offset;		/* Seek location. */
-	int		kf_vnode_type;		/* Vnode type. */
-	int		kf_sock_domain;		/* Socket domain. */
-	int		kf_sock_type;		/* Socket type. */
-	int		kf_sock_protocol;	/* Socket protocol. */
-	struct sockaddr_storage kf_sa_local;	/* Socket address. */
-	struct sockaddr_storage	kf_sa_peer;	/* Peer address. */
 	union {
 		struct {
+			uint32_t	kf_spareint;
+			/* Socket domain. */
+			int		kf_sock_domain0;
+			/* Socket type. */
+			int		kf_sock_type0;
+			/* Socket protocol. */
+			int		kf_sock_protocol0;
+			/* Socket address. */
+			struct sockaddr_storage kf_sa_local;
+			/* Peer address. */
+			struct sockaddr_storage	kf_sa_peer;
 			/* Address of so_pcb. */
 			uint64_t	kf_sock_pcb;
 			/* Address of inp_ppcb. */
 			uint64_t	kf_sock_inpcb;
 			/* Address of unp_conn. */
 			uint64_t	kf_sock_unpconn;
 			/* Send buffer state. */
 			uint16_t	kf_sock_snd_sb_state;
 			/* Receive buffer state. */
 			uint16_t	kf_sock_rcv_sb_state;
 			/* Round to 64 bit alignment. */
 			uint32_t	kf_sock_pad0;
 		} kf_sock;
 		struct {
+			/* Vnode type. */
+			int		kf_file_type;
+			/* Space for future use */
+			int		kf_spareint[3];
+			uint64_t	kf_spareint64[30];
+			/* Vnode filesystem id. */
+			uint64_t	kf_file_fsid;
+			/* File device. */
+			uint64_t	kf_file_rdev;
 			/* Global file id. */
 			uint64_t	kf_file_fileid;
 			/* File size. */
 			uint64_t	kf_file_size;
-			/* Vnode filesystem id. */
-			uint32_t	kf_file_fsid;
-			/* File device. */
-			uint32_t	kf_file_rdev;
+			/* Vnode filesystem id, FreeBSD 11 compat. */
+			uint32_t	kf_file_fsid_freebsd11;
+			/* File device, FreeBSD 11 compat. */
+			uint32_t	kf_file_rdev_freebsd11;
 			/* File mode. */
 			uint16_t	kf_file_mode;
 			/* Round to 64 bit alignment. */
 			uint16_t	kf_file_pad0;
 			uint32_t	kf_file_pad1;
 		} kf_file;
 		struct {
+			uint32_t	kf_spareint[4];
+			uint64_t	kf_spareint64[32];
 			uint32_t	kf_sem_value;
 			uint16_t	kf_sem_mode;
 		} kf_sem;
 		struct {
+			uint32_t	kf_spareint[4];
+			uint64_t	kf_spareint64[32];
 			uint64_t	kf_pipe_addr;
 			uint64_t	kf_pipe_peer;
 			uint32_t	kf_pipe_buffer_cnt;
 			/* Round to 64 bit alignment. */
 			uint32_t	kf_pipe_pad0[3];
 		} kf_pipe;
 		struct {
-			uint32_t	kf_pts_dev;
+			uint32_t	kf_spareint[4];
+			uint64_t	kf_spareint64[32];
+			uint32_t	kf_pts_dev_freebsd11;
+			uint32_t	kf_pts_pad0;
+			uint64_t	kf_pts_dev;
 			/* Round to 64 bit alignment. */
-			uint32_t	kf_pts_pad0[7];
+			uint32_t	kf_pts_pad1[4];
 		} kf_pts;
 		struct {
+			uint32_t	kf_spareint[4];
+			uint64_t	kf_spareint64[32];
 			pid_t		kf_pid;
 		} kf_proc;
 	} kf_un;
 	uint16_t	kf_status;		/* Status flags. */
 	uint16_t	kf_pad1;		/* Round to 32 bit alignment. */
 	int		_kf_ispare0;		/* Space for more stuff. */
 	cap_rights_t	kf_cap_rights;		/* Capability rights. */
 	uint64_t	_kf_cap_spare;		/* Space for future cap_rights_t. */
 	/* Truncated before copyout in sysctl */
 	char		kf_path[PATH_MAX];	/* Path to file, if any. */
 };
+#ifndef _KERNEL
+#define	kf_vnode_type	kf_un.kf_file.kf_file_type
+#define	kf_sock_domain	kf_un.kf_sock.kf_sock_domain0
+#define	kf_sock_type	kf_un.kf_sock.kf_sock_type0
+#define	kf_sock_protocol	kf_un.kf_sock.kf_sock_protocol0
+#endif
 
 /*
  * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
  * another process as a series of entries.
  */
 #define	KVME_TYPE_NONE		0
 #define	KVME_TYPE_DEFAULT	1
 #define	KVME_TYPE_VNODE		2
 #define	KVME_TYPE_SWAP		3
 #define	KVME_TYPE_DEVICE	4
 #define	KVME_TYPE_PHYS		5
 #define	KVME_TYPE_DEAD		6
 #define	KVME_TYPE_SG		7
 #define	KVME_TYPE_MGTDEVICE	8
 #define	KVME_TYPE_UNKNOWN	255
 
 #define	KVME_PROT_READ		0x00000001
 #define	KVME_PROT_WRITE		0x00000002
 #define	KVME_PROT_EXEC		0x00000004
 
 #define	KVME_FLAG_COW		0x00000001
 #define	KVME_FLAG_NEEDS_COPY	0x00000002
 #define	KVME_FLAG_NOCOREDUMP	0x00000004
 #define	KVME_FLAG_SUPER		0x00000008
 #define	KVME_FLAG_GROWS_UP	0x00000010
 #define	KVME_FLAG_GROWS_DOWN	0x00000020
 
 #if defined(__amd64__)
 #define	KINFO_OVMENTRY_SIZE	1168
 #endif
 #if defined(__i386__)
 #define	KINFO_OVMENTRY_SIZE	1128
 #endif
 
 struct kinfo_ovmentry {
 	int	 kve_structsize;		/* Size of kinfo_vmmapentry. */
 	int	 kve_type;			/* Type of map entry. */
 	void	*kve_start;			/* Starting address. */
 	void	*kve_end;			/* Finishing address. */
 	int	 kve_flags;			/* Flags on map entry. */
 	int	 kve_resident;			/* Number of resident pages. */
 	int	 kve_private_resident;		/* Number of private pages. */
 	int	 kve_protection;		/* Protection bitmask. */
 	int	 kve_ref_count;			/* VM obj ref count. */
 	int	 kve_shadow_count;		/* VM obj shadow count. */
 	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
 	void	*_kve_pspare[8];		/* Space for more stuff. */
 	off_t	 kve_offset;			/* Mapping offset in object */
 	uint64_t kve_fileid;			/* inode number if vnode */
-	dev_t	 kve_fsid;			/* dev_t of vnode location */
+	uint32_t kve_fsid;			/* dev_t of vnode location */
 	int	 _kve_ispare[3];		/* Space for more stuff. */
 };
 
 #if defined(__amd64__) || defined(__i386__)
 #define	KINFO_VMENTRY_SIZE	1160
 #endif
 
 struct kinfo_vmentry {
 	int	 kve_structsize;		/* Variable size of record. */
 	int	 kve_type;			/* Type of map entry. */
 	uint64_t kve_start;			/* Starting address. */
 	uint64_t kve_end;			/* Finishing address. */
 	uint64_t kve_offset;			/* Mapping offset in object */
 	uint64_t kve_vn_fileid;			/* inode number if vnode */
-	uint32_t kve_vn_fsid;			/* dev_t of vnode location */
+	uint32_t kve_vn_fsid_freebsd11;		/* dev_t of vnode location */
 	int	 kve_flags;			/* Flags on map entry. */
 	int	 kve_resident;			/* Number of resident pages. */
 	int	 kve_private_resident;		/* Number of private pages. */
 	int	 kve_protection;		/* Protection bitmask. */
 	int	 kve_ref_count;			/* VM obj ref count. */
 	int	 kve_shadow_count;		/* VM obj shadow count. */
 	int	 kve_vn_type;			/* Vnode type. */
 	uint64_t kve_vn_size;			/* File size. */
-	uint32_t kve_vn_rdev;			/* Device id if device. */
+	uint32_t kve_vn_rdev_freebsd11;		/* Device id if device. */
 	uint16_t kve_vn_mode;			/* File mode. */
 	uint16_t kve_status;			/* Status flags. */
-	int	 _kve_ispare[12];		/* Space for more stuff. */
+	uint64_t kve_vn_fsid;			/* dev_t of vnode location */
+	uint64_t kve_vn_rdev;			/* Device id if device. */
+	int	 _kve_ispare[8];		/* Space for more stuff. */
 	/* Truncated before copyout in sysctl */
 	char	 kve_path[PATH_MAX];		/* Path to VM obj, if any. */
 };
 
 /*
  * The "vm.objects" sysctl provides a list of all VM objects in the system
  * via an array of these entries.
  */
 struct kinfo_vmobject {
 	int	kvo_structsize;			/* Variable size of record. */
 	int	kvo_type;			/* Object type: KVME_TYPE_*. */
 	uint64_t kvo_size;			/* Object size in pages. */
 	uint64_t kvo_vn_fileid;			/* inode number if vnode. */
-	uint32_t kvo_vn_fsid;			/* dev_t of vnode location. */
+	uint32_t kvo_vn_fsid_freebsd11;		/* dev_t of vnode location. */
 	int	kvo_ref_count;			/* Reference count. */
 	int	kvo_shadow_count;		/* Shadow count. */
 	int	kvo_memattr;			/* Memory attribute. */
 	uint64_t kvo_resident;			/* Number of resident pages. */
 	uint64_t kvo_active;			/* Number of active pages. */
 	uint64_t kvo_inactive;			/* Number of inactive pages. */
-	uint64_t _kvo_qspare[8];
+	uint64_t kvo_vn_fsid;
+	uint64_t _kvo_qspare[7];
 	uint32_t _kvo_ispare[8];
 	char	kvo_path[PATH_MAX];		/* Pathname, if any. */
 };
 
 /*
  * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
  * another process as a series of entries.  Each stack is represented by a
  * series of symbol names and offsets as generated by stack_sbuf_print(9).
  */
 #define	KKST_MAXLEN	1024
 
 #define	KKST_STATE_STACKOK	0		/* Stack is valid. */
 #define	KKST_STATE_SWAPPED	1		/* Stack swapped out. */
 #define	KKST_STATE_RUNNING	2		/* Stack ephemeral. */
 
 #if defined(__amd64__) || defined(__i386__)
 #define	KINFO_KSTACK_SIZE	1096
 #endif
 
 struct kinfo_kstack {
 	lwpid_t	 kkst_tid;			/* ID of thread. */
 	int	 kkst_state;			/* Validity of stack. */
 	char	 kkst_trace[KKST_MAXLEN];	/* String representing stack. */
 	int	 _kkst_ispare[16];		/* Space for more stuff. */
 };
 
 struct kinfo_sigtramp {
 	void	*ksigtramp_start;
 	void	*ksigtramp_end;
 	void	*ksigtramp_spare[4];
 };
 
 #ifdef _KERNEL
 /* Flags for kern_proc_out function. */
 #define KERN_PROC_NOTHREADS	0x1
 #define KERN_PROC_MASK32	0x2
 
 /* Flags for kern_proc_filedesc_out. */
 #define	KERN_FILEDESC_PACK_KINFO	0x00000001U
 
 /* Flags for kern_proc_vmmap_out. */
 #define	KERN_VMMAP_PACK_KINFO		0x00000001U
 struct sbuf;
 
 /*
  * The kern_proc out functions are helper functions to dump process
  * miscellaneous kinfo structures to sbuf.  The main consumers are KERN_PROC
  * sysctls but they may also be used by other kernel subsystems.
  *
  * The functions manipulate the process locking state and expect the process
  * to be locked on enter.  On return the process is unlocked.
  */
 
 int	kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
 	int flags);
 int	kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
 int	kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
 int	kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
 	int flags);
 
 int	vntype_to_kinfo(int vtype);
 #endif /* !_KERNEL */
 
 #endif
Index: head/sys/sys/vnode.h
===================================================================
--- head/sys/sys/vnode.h	(revision 318735)
+++ head/sys/sys/vnode.h	(revision 318736)
@@ -1,887 +1,890 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/bufobj.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/mutex.h>
 #include <sys/rangelock.h>
 #include <sys/selinfo.h>
 #include <sys/uio.h>
 #include <sys/acl.h>
 #include <sys/ktr.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
 		  VMARKER };
 
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 
 struct namecache;
 
 struct vpollinfo {
 	struct	mtx vpi_lock;		/* lock to protect below */
 	struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 	short	vpi_events;		/* what they are looking for */
 	short	vpi_revents;		/* what has happened */
 };
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  *
  * Lock reference:
  *	c - namecache mutex
  *	i - interlock
  *	l - mp mnt_listmtx or freelist mutex
  *	I - updated with atomics, 0->1 and 1->0 transitions with interlock held
  *	m - mount point interlock
  *	p - pollinfo lock
  *	u - Only a reference to the vnode is needed to read.
  *	v - vnode lock
  *
  * Vnodes may be found on many lists.  The general way to deal with operating
  * on a vnode that is on a list is:
  *	1) Lock the list and find the vnode.
  *	2) Lock interlock so that the vnode does not go away.
  *	3) Unlock the list to avoid lock order reversals.
  *	4) vget with LK_INTERLOCK and check for ENOENT, or
  *	5) Check for DOOMED if the vnode lock is not required.
  *	6) Perform your operation, then vput().
  */
 
 #if defined(_KERNEL) || defined(_KVM_VNODE)
 
 struct vnode {
 	/*
 	 * Fields which define the identity of the vnode.  These fields are
 	 * owned by the filesystem (XXX: and vgone() ?)
 	 */
 	const char *v_tag;			/* u type of underlying data */
 	struct	vop_vector *v_op;		/* u vnode operations vector */
 	void	*v_data;			/* u private data for fs */
 
 	/*
 	 * Filesystem instance stuff
 	 */
 	struct	mount *v_mount;			/* u ptr to vfs we are in */
 	TAILQ_ENTRY(vnode) v_nmntvnodes;	/* m vnodes for mount point */
 
 	/*
 	 * Type specific fields, only one applies to any given vnode.
 	 * See #defines below for renaming to v_* namespace.
 	 */
 	union {
 		struct mount	*vu_mount;	/* v ptr to mountpoint (VDIR) */
 		struct socket	*vu_socket;	/* v unix domain net (VSOCK) */
 		struct cdev	*vu_cdev; 	/* v device (VCHR, VBLK) */
 		struct fifoinfo	*vu_fifoinfo;	/* v fifo (VFIFO) */
 	} v_un;
 
 	/*
 	 * vfs_hash: (mount + inode) -> vnode hash.  The hash value
 	 * itself is grouped with other int fields, to avoid padding.
 	 */
 	LIST_ENTRY(vnode)	v_hashlist;
 
 	/*
 	 * VFS_namecache stuff
 	 */
 	LIST_HEAD(, namecache) v_cache_src;	/* c Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* c Cache entries to us */
 	struct namecache *v_cache_dd;		/* c Cache entry for .. vnode */
 
 	/*
 	 * Locking
 	 */
 	struct	lock v_lock;			/* u (if fs don't have one) */
 	struct	mtx v_interlock;		/* lock for "i" things */
 	struct	lock *v_vnlock;			/* u pointer to vnode lock */
 
 	/*
 	 * The machinery of being a vnode
 	 */
 	TAILQ_ENTRY(vnode) v_actfreelist;	/* l vnode active/free lists */
 	struct bufobj	v_bufobj;		/* * Buffer cache object */
 
 	/*
 	 * Hooks for various subsystems and features.
 	 */
 	struct vpollinfo *v_pollinfo;		/* i Poll events, p for *v_pi */
 	struct label *v_label;			/* MAC label for vnode */
 	struct lockf *v_lockf;		/* Byte-level advisory lock list */
 	struct rangelock v_rl;			/* Byte-range lock */
 
 	/*
 	 * clustering stuff
 	 */
 	daddr_t	v_cstart;			/* v start block of cluster */
 	daddr_t	v_lasta;			/* v last allocation  */
 	daddr_t	v_lastw;			/* v last write  */
 	int	v_clen;				/* v length of cur. cluster */
 
 	u_int	v_holdcnt;			/* I prevents recycling. */
 	u_int	v_usecount;			/* I ref count of users */
 	u_int	v_iflag;			/* i vnode flags (see below) */
 	u_int	v_vflag;			/* v vnode flags */
 	u_int	v_mflag;			/* l mnt-specific vnode flags */
 	int	v_writecount;			/* v ref count of writers */
 	u_int	v_hash;
 	enum	vtype v_type;			/* u vnode type */
 };
 
 #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
 
 #define	v_mountedhere	v_un.vu_mount
 #define	v_socket	v_un.vu_socket
 #define	v_rdev		v_un.vu_cdev
 #define	v_fifoinfo	v_un.vu_fifoinfo
 
 #define	bo2vnode(bo)	__containerof((bo), struct vnode, v_bufobj)
 
 /* XXX: These are temporary to avoid a source sweep at this time */
 #define v_object	v_bufobj.bo_object
 
 /*
  * Userland version of struct vnode, for sysctl.
  */
 struct xvnode {
 	size_t	xv_size;			/* sizeof(struct xvnode) */
 	void	*xv_vnode;			/* address of real vnode */
 	u_long	xv_flag;			/* vnode vflags */
 	int	xv_usecount;			/* reference count of users */
 	int	xv_writecount;			/* reference count of writers */
 	int	xv_holdcnt;			/* page & buffer references */
 	u_long	xv_id;				/* capability identifier */
 	void	*xv_mount;			/* address of parent mount */
 	long	xv_numoutput;			/* num of writes in progress */
 	enum	vtype xv_type;			/* vnode type */
 	union {
 		void	*xvu_socket;		/* socket, if VSOCK */
 		void	*xvu_fifo;		/* fifo, if VFIFO */
 		dev_t	xvu_rdev;		/* maj/min, if VBLK/VCHR */
 		struct {
 			dev_t	xvu_dev;	/* device, if VDIR/VREG/VLNK */
 			ino_t	xvu_ino;	/* id, if VDIR/VREG/VLNK */
 		} xv_uns;
 	} xv_un;
 };
 #define xv_socket	xv_un.xvu_socket
 #define xv_fifo		xv_un.xvu_fifo
 #define xv_rdev		xv_un.xvu_rdev
 #define xv_dev		xv_un.xv_uns.xvu_dev
 #define xv_ino		xv_un.xv_uns.xvu_ino
 
 /* We don't need to lock the knlist */
 #define	VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL ||	\
 	    KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note))
 
 #define VN_KNOTE(vp, b, a)					\
 	do {							\
 		if (!VN_KNLIST_EMPTY(vp))			\
 			KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \
 			    (a) | KNF_NOKQLOCK);		\
 	} while (0)
 #define	VN_KNOTE_LOCKED(vp, b)		VN_KNOTE(vp, b, KNF_LISTLOCKED)
 #define	VN_KNOTE_UNLOCKED(vp, b)	VN_KNOTE(vp, b, 0)
 
 /*
  * Vnode flags.
  *	VI flags are protected by interlock and live in v_iflag
  *	VV flags are protected by the vnode lock and live in v_vflag
  *
  *	VI_DOOMED is doubly protected by the interlock and vnode lock.  Both
  *	are required for writing but the status may be checked with either.
  */
 #define	VI_MOUNT	0x0020	/* Mount in progress */
 #define	VI_DOOMED	0x0080	/* This vnode is being recycled */
 #define	VI_FREE		0x0100	/* This vnode is on the freelist */
 #define	VI_ACTIVE	0x0200	/* This vnode is on the active list */
 #define	VI_DOINGINACT	0x0800	/* VOP_INACTIVE is in progress */
 #define	VI_OWEINACT	0x1000	/* Need to call inactive */
 
 #define	VV_ROOT		0x0001	/* root of its filesystem */
 #define	VV_ISTTY	0x0002	/* vnode represents a tty */
 #define	VV_NOSYNC	0x0004	/* unlinked, stop syncing */
 #define	VV_ETERNALDEV	0x0008	/* device that is never destroyed */
 #define	VV_CACHEDLABEL	0x0010	/* Vnode has valid cached MAC label */
 #define	VV_TEXT		0x0020	/* vnode is a pure text prototype */
 #define	VV_COPYONWRITE	0x0040	/* vnode is doing copy-on-write */
 #define	VV_SYSTEM	0x0080	/* vnode being used by kernel */
 #define	VV_PROCDEP	0x0100	/* vnode is process dependent */
 #define	VV_NOKNOTE	0x0200	/* don't activate knotes on this vnode */
 #define	VV_DELETED	0x0400	/* should be removed */
 #define	VV_MD		0x0800	/* vnode backs the md device */
 #define	VV_FORCEINSMQ	0x1000	/* force the insmntque to succeed */
 
 #define	VMP_TMPMNTFREELIST	0x0001	/* Vnode is on mnt's tmp free list */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
-	short		va_nlink;	/* number of references to file */
+	u_short		va_padding0;
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
+	nlink_t		va_nlink;	/* number of references to file */
 	dev_t		va_fsid;	/* filesystem id */
-	long		va_fileid;	/* file id */
+	ino_t		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	struct timespec	va_birthtime;	/* time file created */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	dev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define	VA_EXCLUSIVE	0x02		/* exclusive create request */
 #define	VA_SYNC		0x04		/* O_SYNC truncation */
 
 /*
  * Flags for ioflag. (high 16 bits used to ask for read-ahead and
  * help with write clustering)
  * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h
  */
 #define	IO_UNIT		0x0001		/* do I/O as atomic unit */
 #define	IO_APPEND	0x0002		/* append write to end */
 #define	IO_NDELAY	0x0004		/* FNDELAY flag set in file table */
 #define	IO_NODELOCKED	0x0008		/* underlying node already locked */
 #define	IO_ASYNC	0x0010		/* bawrite rather then bdwrite */
 #define	IO_VMIO		0x0020		/* data already in VMIO space */
 #define	IO_INVAL	0x0040		/* invalidate after I/O */
 #define	IO_SYNC		0x0080		/* do I/O synchronously */
 #define	IO_DIRECT	0x0100		/* attempt to bypass buffer cache */
 #define	IO_NOREUSE	0x0200		/* VMIO data won't be reused */
 #define	IO_EXT		0x0400		/* operate on external attributes */
 #define	IO_NORMAL	0x0800		/* operate on regular data */
 #define	IO_NOMACCHECK	0x1000		/* MAC checks unnecessary */
 #define	IO_BUFLOCKED	0x2000		/* ffs flag; indir buf is locked */
 #define	IO_RANGELOCKED	0x4000		/* range locked */
 
 #define IO_SEQMAX	0x7F		/* seq heuristic max value */
 #define IO_SEQSHIFT	16		/* seq heuristic in upper 16 bits */
 
 /*
  * Flags for accmode_t.
  */
 #define	VEXEC			000000000100 /* execute/search permission */
 #define	VWRITE			000000000200 /* write permission */
 #define	VREAD			000000000400 /* read permission */
 #define	VADMIN			000000010000 /* being the file owner */
 #define	VAPPEND			000000040000 /* permission to write/append */
 /*
  * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only
  * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL,
  * and 0 otherwise.  This never happens with ordinary unix access rights
  * or POSIX.1e ACLs.  Obviously, VEXPLICIT_DENY must be OR-ed with
  * some other V* constant.
  */
 #define	VEXPLICIT_DENY		000000100000
 #define	VREAD_NAMED_ATTRS 	000000200000 /* not used */
 #define	VWRITE_NAMED_ATTRS 	000000400000 /* not used */
 #define	VDELETE_CHILD	 	000001000000
 #define	VREAD_ATTRIBUTES 	000002000000 /* permission to stat(2) */
 #define	VWRITE_ATTRIBUTES 	000004000000 /* change {m,c,a}time */
 #define	VDELETE		 	000010000000
 #define	VREAD_ACL	 	000020000000 /* read ACL and file mode */
 #define	VWRITE_ACL	 	000040000000 /* change ACL and/or file mode */
 #define	VWRITE_OWNER	 	000100000000 /* change file owner */
 #define	VSYNCHRONIZE	 	000200000000 /* not used */
 #define	VCREAT			000400000000 /* creating new file */
 #define	VVERIFY			001000000000 /* verification required */
 
 /*
  * Permissions that were traditionally granted only to the file owner.
  */
 #define VADMIN_PERMS	(VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \
     VWRITE_OWNER)
 
 /*
  * Permissions that were traditionally granted to everyone.
  */
 #define VSTAT_PERMS	(VREAD_ATTRIBUTES | VREAD_ACL)
 
 /*
  * Permissions that allow to change the state of the file in any way.
  */
 #define VMODIFY_PERMS	(VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \
     VDELETE)
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 /*
  * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
  */
 #define VLKTIMEOUT	(hz / 20 + 1)
 
 #ifdef _KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 extern u_int ncsizefactor;
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define	IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define	VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define	MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002	/* vflush: force file closure */
 #define	WRITECLOSE	0x0004	/* vflush: only close writable files */
 #define	EARLYFLUSH	0x0008	/* vflush: early call for ffs_flushfiles */
 #define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
 #define	V_ALT		0x0002	/* vinvalbuf: invalidate only alternate bufs */
 #define	V_NORMAL	0x0004	/* vinvalbuf: invalidate only regular bufs */
 #define	V_CLEANONLY	0x0008	/* vinvalbuf: invalidate only clean bufs */
 #define	V_VMIO		0x0010	/* vinvalbuf: called during pageout */
 #define	REVOKEALL	0x0001	/* vop_revoke: revoke all aliases */
 #define	V_WAIT		0x0001	/* vn_start_write: sleep for suspend */
 #define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
 #define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
 #define	V_MNTREF	0x0010	/* vn_start_write: mp is already ref-ed */
 
 #define	VR_START_WRITE	0x0001	/* vfs_write_resume: start write atomically */
 #define	VR_NO_SUSPCLR	0x0002	/* vfs_write_resume: do not clear suspension */
 
 #define	VS_SKIP_UNMOUNT	0x0001	/* vfs_write_suspend: fail if the
 				   filesystem is being unmounted */
 
 #define	VREF(vp)	vref(vp)
 
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	struct mount *rootdevmp;	/* "/dev" mount */
 extern	int desiredvnodes;		/* number of vnodes desired */
 extern	struct uma_zone *namei_zone;
 extern	struct vattr va_null;		/* predefined null vattr structure */
 
 #define	VI_LOCK(vp)	mtx_lock(&(vp)->v_interlock)
 #define	VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags))
 #define	VI_TRYLOCK(vp)	mtx_trylock(&(vp)->v_interlock)
 #define	VI_UNLOCK(vp)	mtx_unlock(&(vp)->v_interlock)
 #define	VI_MTX(vp)	(&(vp)->v_interlock)
 
 #define	VN_LOCK_AREC(vp)	lockallowrecurse((vp)->v_vnlock)
 #define	VN_LOCK_ASHARE(vp)	lockallowshare((vp)->v_vnlock)
 #define	VN_LOCK_DSHARE(vp)	lockdisableshare((vp)->v_vnlock)
 
 #endif /* _KERNEL */
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define	VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define	VDESC_VP0_WILLRELE	0x0001
 #define	VDESC_VP1_WILLRELE	0x0002
 #define	VDESC_VP2_WILLRELE	0x0004
 #define	VDESC_VP3_WILLRELE	0x0008
 #define	VDESC_NOMAP_VPP		0x0100
 #define	VDESC_VPP_WILLRELE	0x0200
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 typedef int vop_bypass_t(struct vop_generic_args *);
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	char	*vdesc_name;		/* a readable name for debugging */
 	int	 vdesc_flags;		/* VDESC_* flags */
 	vop_bypass_t	*vdesc_call;	/* Function to call */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_thread_offset;	/* thread location, if any */
 	int	vdesc_componentname_offset; /* if any */
 };
 
 #ifdef _KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 #define	VOPARG_OFFSETOF(s_type, field)	__offsetof(s_type, field)
 #define	VOPARG_OFFSETTO(s_type, s_offset, struct_p) \
     ((s_type)(((char*)(struct_p)) + (s_offset)))
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Support code to aid in debugging VFS locking problems.  Not totally
  * reliable since if the thread sleeps between changing the lock
  * state and checking it with the assert, some other thread could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  Note that the unreliability is
  * limited to false negatives; efforts were made to ensure that false
  * positives cannot occur.
  */
 void	assert_vi_locked(struct vnode *vp, const char *str);
 void	assert_vi_unlocked(struct vnode *vp, const char *str);
 void	assert_vop_elocked(struct vnode *vp, const char *str);
 void	assert_vop_locked(struct vnode *vp, const char *str);
 void	assert_vop_unlocked(struct vnode *vp, const char *str);
 
 #define	ASSERT_VI_LOCKED(vp, str)	assert_vi_locked((vp), (str))
 #define	ASSERT_VI_UNLOCKED(vp, str)	assert_vi_unlocked((vp), (str))
 #define	ASSERT_VOP_ELOCKED(vp, str)	assert_vop_elocked((vp), (str))
 #define	ASSERT_VOP_LOCKED(vp, str)	assert_vop_locked((vp), (str))
 #define	ASSERT_VOP_UNLOCKED(vp, str)	assert_vop_unlocked((vp), (str))
 
 #else /* !DEBUG_VFS_LOCKS */
 
 #define	ASSERT_VI_LOCKED(vp, str)	((void)0)
 #define	ASSERT_VI_UNLOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_ELOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_LOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_UNLOCKED(vp, str)	((void)0)
 #endif /* DEBUG_VFS_LOCKS */
 
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(c) ((c)->a_desc->vdesc_call(c))
 
 #define DOINGASYNC(vp)	   					\
 	(((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 &&	\
 	 ((curthread->td_pflags & TDP_SYNCIO) == 0))
 
 /*
  * VMIO support inline
  */
 
 extern int vmiodirenable;
 
 static __inline int
 vn_canvmio(struct vnode *vp)
 {
       if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
 		return(TRUE);
 	return(FALSE);
 }
 
 /*
  * Finally, include the default set of vnode operations.
  */
 typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
 #include "vnode_if.h"
 
 /* vn_open_flags */
 #define	VN_OPEN_NOAUDIT		0x00000001
 #define	VN_OPEN_NOCAPCHECK	0x00000002
 #define	VN_OPEN_NAMECACHE	0x00000004
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
+struct freebsd11_stat;
 struct thread;
 struct proc;
 struct stat;
 struct nstat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vfsops;
 struct vnode;
 
 typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **);
 
 int	bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn,
 	    daddr_t endn);
 /* cache_* may belong in namei.h. */
 void	cache_changesize(int newhashsize);
 #define	cache_enter(dvp, vp, cnp)					\
 	cache_enter_time(dvp, vp, cnp, NULL, NULL)
 void	cache_enter_time(struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp, struct timespec *tsp,
 	    struct timespec *dtsp);
 int	cache_lookup(struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp, struct timespec *tsp, int *ticksp);
 void	cache_purge(struct vnode *vp);
 void	cache_purge_negative(struct vnode *vp);
 void	cache_purgevfs(struct mount *mp, bool force);
 int	change_dir(struct vnode *vp, struct thread *td);
 void	cvtstat(struct stat *st, struct ostat *ost);
-void	cvtnstat(struct stat *sb, struct nstat *nsb);
+void	freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
+void	freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost);
 int	getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
 	    struct vnode **vpp);
 void	getnewvnode_reserve(u_int count);
 void	getnewvnode_drop_reserve(void);
 int	insmntque1(struct vnode *vp, struct mount *mp,
 	    void (*dtr)(struct vnode *, void *), void *dtr_arg);
 int	insmntque(struct vnode *vp, struct mount *mp);
 u_quad_t init_va_filerev(void);
 int	speedup_syncer(void);
 int	vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf,
 	    u_int *buflen);
 int	vn_fullpath(struct thread *td, struct vnode *vn,
 	    char **retbuf, char **freebuf);
 int	vn_fullpath_global(struct thread *td, struct vnode *vn,
 	    char **retbuf, char **freebuf);
 struct vnode *
 	vn_dir_dd_ino(struct vnode *vp);
 int	vn_commname(struct vnode *vn, char *buf, u_int buflen);
 int	vn_path_to_global_path(struct thread *td, struct vnode *vp,
 	    char *path, u_int pathlen);
 int	vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
 	    gid_t file_gid, accmode_t accmode, struct ucred *cred,
 	    int *privused);
 int	vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
 	    struct acl *aclp, accmode_t accmode, struct ucred *cred,
 	    int *privused);
 int	vaccess_acl_posix1e(enum vtype type, uid_t file_uid,
 	    gid_t file_gid, struct acl *acl, accmode_t accmode,
 	    struct ucred *cred, int *privused);
 void	vattr_null(struct vattr *vap);
 int	vcount(struct vnode *vp);
 #define	vdrop(vp)	_vdrop((vp), 0)
 #define	vdropl(vp)	_vdrop((vp), 1)
 void	_vdrop(struct vnode *, bool);
 int	vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
 int	vget(struct vnode *vp, int lockflag, struct thread *td);
 void	vgone(struct vnode *vp);
 #define	vhold(vp)	_vhold((vp), 0)
 #define	vholdl(vp)	_vhold((vp), 1)
 void	_vhold(struct vnode *, bool);
 void	vinactive(struct vnode *, struct thread *);
 int	vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
 int	vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
 	    int blksize);
 void	vunref(struct vnode *);
 void	vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
 int	vrecycle(struct vnode *vp);
 int	vrecyclel(struct vnode *vp);
 int	vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
 	    struct ucred *cred);
 int	vn_close(struct vnode *vp,
 	    int flags, struct ucred *file_cred, struct thread *td);
 void	vn_finished_write(struct mount *mp);
 void	vn_finished_secondary_write(struct mount *mp);
 int	vn_isdisk(struct vnode *vp, int *errp);
 int	_vn_lock(struct vnode *vp, int flags, char *file, int line);
 #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
 int	vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp);
 int	vn_open_cred(struct nameidata *ndp, int *flagp, int cmode,
 	    u_int vn_open_flags, struct ucred *cred, struct file *fp);
 int	vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
 	    struct thread *td, struct file *fp);
 void	vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
 int	vn_pollrecord(struct vnode *vp, struct thread *p, int events);
 int	vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid,
 	    struct thread *td);
 int	vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
 	    size_t len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *active_cred, struct ucred *file_cred, size_t *aresid,
 	    struct thread *td);
 int	vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
 	    struct thread *td);
 int	vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
 	    struct ucred *file_cred, struct thread *td);
 int	vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
 int	vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
 	    int flags);
 int	vn_writechk(struct vnode *vp);
 int	vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int *buflen, char *buf, struct thread *td);
 int	vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int buflen, char *buf, struct thread *td);
 int	vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, struct thread *td);
 int	vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
 	    struct vnode **rvp);
 int	vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc,
 	    void *alloc_arg, int lkflags, struct vnode **rvp);
 int	vn_utimes_perm(struct vnode *vp, struct vattr *vap,
 	    struct ucred *cred, struct thread *td);
 
 int	vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
 int	vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
 	    struct uio *uio);
 
 #define	vn_rangelock_unlock(vp, cookie)					\
 	rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
 #define	vn_rangelock_unlock_range(vp, cookie, start, end)		\
 	rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), 	\
 	    VI_MTX(vp))
 #define	vn_rangelock_rlock(vp, start, end)				\
 	rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 #define	vn_rangelock_wlock(vp, start, end)				\
 	rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 
 int	vfs_cache_lookup(struct vop_lookup_args *ap);
 void	vfs_timestamp(struct timespec *);
 void	vfs_write_resume(struct mount *mp, int flags);
 int	vfs_write_suspend(struct mount *mp, int flags);
 int	vfs_write_suspend_umnt(struct mount *mp);
 void	vnlru_free(int, struct vfsops *);
 int	vop_stdbmap(struct vop_bmap_args *);
 int	vop_stdfdatasync_buf(struct vop_fdatasync_args *);
 int	vop_stdfsync(struct vop_fsync_args *);
 int	vop_stdgetwritemount(struct vop_getwritemount_args *);
 int	vop_stdgetpages(struct vop_getpages_args *);
 int	vop_stdinactive(struct vop_inactive_args *);
 int	vop_stdislocked(struct vop_islocked_args *);
 int	vop_stdkqfilter(struct vop_kqfilter_args *);
 int	vop_stdlock(struct vop_lock1_args *);
 int	vop_stdputpages(struct vop_putpages_args *);
 int	vop_stdunlock(struct vop_unlock_args *);
 int	vop_nopoll(struct vop_poll_args *);
 int	vop_stdaccess(struct vop_access_args *ap);
 int	vop_stdaccessx(struct vop_accessx_args *ap);
 int	vop_stdadvise(struct vop_advise_args *ap);
 int	vop_stdadvlock(struct vop_advlock_args *ap);
 int	vop_stdadvlockasync(struct vop_advlockasync_args *ap);
 int	vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
 int	vop_stdallocate(struct vop_allocate_args *ap);
 int	vop_stdpathconf(struct vop_pathconf_args *);
 int	vop_stdpoll(struct vop_poll_args *);
 int	vop_stdvptocnp(struct vop_vptocnp_args *ap);
 int	vop_stdvptofh(struct vop_vptofh_args *ap);
 int	vop_stdunp_bind(struct vop_unp_bind_args *ap);
 int	vop_stdunp_connect(struct vop_unp_connect_args *ap);
 int	vop_stdunp_detach(struct vop_unp_detach_args *ap);
 int	vop_eopnotsupp(struct vop_generic_args *ap);
 int	vop_ebadf(struct vop_generic_args *ap);
 int	vop_einval(struct vop_generic_args *ap);
 int	vop_enoent(struct vop_generic_args *ap);
 int	vop_enotty(struct vop_generic_args *ap);
 int	vop_null(struct vop_generic_args *ap);
 int	vop_panic(struct vop_generic_args *ap);
 int	dead_poll(struct vop_poll_args *ap);
 int	dead_read(struct vop_read_args *ap);
 int	dead_write(struct vop_write_args *ap);
 
 /* These are called from within the actual VOPS. */
 void	vop_close_post(void *a, int rc);
 void	vop_create_post(void *a, int rc);
 void	vop_deleteextattr_post(void *a, int rc);
 void	vop_link_post(void *a, int rc);
 void	vop_lookup_post(void *a, int rc);
 void	vop_lookup_pre(void *a);
 void	vop_mkdir_post(void *a, int rc);
 void	vop_mknod_post(void *a, int rc);
 void	vop_open_post(void *a, int rc);
 void	vop_read_post(void *a, int rc);
 void	vop_readdir_post(void *a, int rc);
 void	vop_reclaim_post(void *a, int rc);
 void	vop_remove_post(void *a, int rc);
 void	vop_rename_post(void *a, int rc);
 void	vop_rename_pre(void *a);
 void	vop_rmdir_post(void *a, int rc);
 void	vop_setattr_post(void *a, int rc);
 void	vop_setextattr_post(void *a, int rc);
 void	vop_symlink_post(void *a, int rc);
 
 #ifdef DEBUG_VFS_LOCKS
 void	vop_strategy_pre(void *a);
 void	vop_lock_pre(void *a);
 void	vop_lock_post(void *a, int rc);
 void	vop_unlock_post(void *a, int rc);
 void	vop_unlock_pre(void *a);
 #else
 #define	vop_strategy_pre(x)	do { } while (0)
 #define	vop_lock_pre(x)		do { } while (0)
 #define	vop_lock_post(x, y)	do { } while (0)
 #define	vop_unlock_post(x, y)	do { } while (0)
 #define	vop_unlock_pre(x)	do { } while (0)
 #endif
 
 void	vop_rename_fail(struct vop_rename_args *ap);
 
 #define	VOP_WRITE_PRE(ap)						\
 	struct vattr va;						\
 	int error;							\
 	off_t osize, ooffset, noffset;					\
 									\
 	osize = ooffset = noffset = 0;					\
 	if (!VN_KNLIST_EMPTY((ap)->a_vp)) {				\
 		error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred);	\
 		if (error)						\
 			return (error);					\
 		ooffset = (ap)->a_uio->uio_offset;			\
 		osize = (off_t)va.va_size;				\
 	}
 
 #define VOP_WRITE_POST(ap, ret)						\
 	noffset = (ap)->a_uio->uio_offset;				\
 	if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) {	\
 		VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE			\
 		    | (noffset > osize ? NOTE_EXTEND : 0));		\
 	}
 
 #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
 
 
 void	vput(struct vnode *vp);
 void	vrele(struct vnode *vp);
 void	vref(struct vnode *vp);
 void	vrefl(struct vnode *vp);
 void	vrefact(struct vnode *vp);
 int	vrefcnt(struct vnode *vp);
 void 	v_addpollinfo(struct vnode *vp);
 
 int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td);
 void vnode_destroy_vobject(struct vnode *vp);
 
 extern struct vop_vector fifo_specops;
 extern struct vop_vector dead_vnodeops;
 extern struct vop_vector default_vnodeops;
 
 #define VOP_PANIC	((void*)(uintptr_t)vop_panic)
 #define VOP_NULL	((void*)(uintptr_t)vop_null)
 #define VOP_EBADF	((void*)(uintptr_t)vop_ebadf)
 #define VOP_ENOTTY	((void*)(uintptr_t)vop_enotty)
 #define VOP_EINVAL	((void*)(uintptr_t)vop_einval)
 #define VOP_ENOENT	((void*)(uintptr_t)vop_enoent)
 #define VOP_EOPNOTSUPP	((void*)(uintptr_t)vop_eopnotsupp)
 
 /* fifo_vnops.c */
 int	fifo_printinfo(struct vnode *);
 
 /* vfs_hash.c */
 typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg);
 
 void vfs_hash_changesize(int newhashsize);
 int vfs_hash_get(const struct mount *mp, u_int hash, int flags,
     struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 u_int vfs_hash_index(struct vnode *vp);
 int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
     struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 void vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td,
     struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 void vfs_hash_rehash(struct vnode *vp, u_int hash);
 void vfs_hash_remove(struct vnode *vp);
 
 int vfs_kqfilter(struct vop_kqfilter_args *);
 void vfs_mark_atime(struct vnode *vp, struct ucred *cred);
 struct dirent;
 int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
 
 int vfs_unixify_accmode(accmode_t *accmode);
 
 void vfs_unp_reclaim(struct vnode *vp);
 
 int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode);
 int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
     gid_t gid);
 int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td);
 int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td);
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
Index: head/sys/vm/swap_pager.c
===================================================================
--- head/sys/vm/swap_pager.c	(revision 318735)
+++ head/sys/vm/swap_pager.c	(revision 318736)
@@ -1,2825 +1,2850 @@
 /*-
  * Copyright (c) 1998 Matthew Dillon,
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *				New Swap System
  *				Matthew Dillon
  *
  * Radix Bitmap 'blists'.
  *
  *	- The new swapper uses the new radix bitmap code.  This should scale
  *	  to arbitrarily small or arbitrarily large swap spaces and an almost
  *	  arbitrary degree of fragmentation.
  *
  * Features:
  *
  *	- on the fly reallocation of swap during putpages.  The new system
  *	  does not try to keep previously allocated swap blocks for dirty
  *	  pages.
  *
  *	- on the fly deallocation of swap
  *
  *	- No more garbage collection required.  Unnecessarily allocated swap
  *	  blocks only exist for dirty vm_page_t's now and these are already
  *	  cycled (in a high-load system) by the pager.  We also do on-the-fly
  *	  removal of invalidated swap blocks when a page is destroyed
  *	  or renamed.
  *
  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
  *
  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
  *	@(#)vm_swap.c	8.5 (Berkeley) 2/17/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_compat.h"
 #include "opt_swap.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/blist.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 
 /*
  * SWB_NPAGES must be a power of 2.  It may be set to 1, 2, 4, 8, 16
  * or 32 pages per allocation.
  * The 32-page limit is due to the radix code (kern/subr_blist.c).
  */
 #ifndef MAX_PAGEOUT_CLUSTER
 #define MAX_PAGEOUT_CLUSTER 16
 #endif
 
 #if !defined(SWB_NPAGES)
 #define SWB_NPAGES	MAX_PAGEOUT_CLUSTER
 #endif
 
 /*
  * The swblock structure maps an object and a small, fixed-size range
  * of page indices to disk addresses within a swap area.
  * The collection of these mappings is implemented as a hash table.
  * Unused disk addresses within a swap area are allocated and managed
  * using a blist.
  */
 #define SWCORRECT(n) (sizeof(void *) * (n) / sizeof(daddr_t))
 #define SWAP_META_PAGES		(SWB_NPAGES * 2)
 #define SWAP_META_MASK		(SWAP_META_PAGES - 1)
 
 struct swblock {
 	struct swblock	*swb_hnext;
 	vm_object_t	swb_object;
 	vm_pindex_t	swb_index;
 	int		swb_count;
 	daddr_t		swb_pages[SWAP_META_PAGES];
 };
 
 static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data");
 static struct mtx sw_dev_mtx;
 static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq);
 static struct swdevt *swdevhd;	/* Allocate from here next */
 static int nswapdev;		/* Number of swap devices */
 int swap_pager_avail;
 static struct sx swdev_syscall_lock;	/* serialize swap(on|off) */
 
 static vm_ooffset_t swap_total;
 SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0,
     "Total amount of available swap storage.");
 static vm_ooffset_t swap_reserved;
 SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0,
     "Amount of swap storage needed to back all allocated anonymous memory.");
 static int overcommit = 0;
 SYSCTL_INT(_vm, OID_AUTO, overcommit, CTLFLAG_RW, &overcommit, 0,
     "Configure virtual memory overcommit behavior. See tuning(7) "
     "for details.");
 static unsigned long swzone;
 SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0,
     "Actual size of swap metadata zone");
 static unsigned long swap_maxpages;
 SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0,
     "Maximum amount of swap supported");
 
 /* bits from overcommit */
 #define	SWAP_RESERVE_FORCE_ON		(1 << 0)
 #define	SWAP_RESERVE_RLIMIT_ON		(1 << 1)
 #define	SWAP_RESERVE_ALLOW_NONWIRED	(1 << 2)
 
 int
 swap_reserve(vm_ooffset_t incr)
 {
 
 	return (swap_reserve_by_cred(incr, curthread->td_ucred));
 }
 
 int
 swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
 {
 	vm_ooffset_t r, s;
 	int res, error;
 	static int curfail;
 	static struct timeval lastfail;
 	struct uidinfo *uip;
 
 	uip = cred->cr_ruidinfo;
 
 	if (incr & PAGE_MASK)
 		panic("swap_reserve: & PAGE_MASK");
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(curproc);
 		error = racct_add(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
 		if (error != 0)
 			return (0);
 	}
 #endif
 
 	res = 0;
 	mtx_lock(&sw_dev_mtx);
 	r = swap_reserved + incr;
 	if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) {
 		s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_cnt.v_wire_count;
 		s *= PAGE_SIZE;
 	} else
 		s = 0;
 	s += swap_total;
 	if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s ||
 	    (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) {
 		res = 1;
 		swap_reserved = r;
 	}
 	mtx_unlock(&sw_dev_mtx);
 
 	if (res) {
 		UIDINFO_VMSIZE_LOCK(uip);
 		if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 &&
 		    uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) &&
 		    priv_check(curthread, PRIV_VM_SWAP_NORLIMIT))
 			res = 0;
 		else
 			uip->ui_vmsize += incr;
 		UIDINFO_VMSIZE_UNLOCK(uip);
 		if (!res) {
 			mtx_lock(&sw_dev_mtx);
 			swap_reserved -= incr;
 			mtx_unlock(&sw_dev_mtx);
 		}
 	}
 	if (!res && ppsratecheck(&lastfail, &curfail, 1)) {
 		printf("uid %d, pid %d: swap reservation for %jd bytes failed\n",
 		    uip->ui_uid, curproc->p_pid, incr);
 	}
 
 #ifdef RACCT
 	if (!res) {
 		PROC_LOCK(curproc);
 		racct_sub(curproc, RACCT_SWAP, incr);
 		PROC_UNLOCK(curproc);
 	}
 #endif
 
 	return (res);
 }
 
 void
 swap_reserve_force(vm_ooffset_t incr)
 {
 	struct uidinfo *uip;
 
 	mtx_lock(&sw_dev_mtx);
 	swap_reserved += incr;
 	mtx_unlock(&sw_dev_mtx);
 
 #ifdef RACCT
 	PROC_LOCK(curproc);
 	racct_add_force(curproc, RACCT_SWAP, incr);
 	PROC_UNLOCK(curproc);
 #endif
 
 	uip = curthread->td_ucred->cr_ruidinfo;
 	PROC_LOCK(curproc);
 	UIDINFO_VMSIZE_LOCK(uip);
 	uip->ui_vmsize += incr;
 	UIDINFO_VMSIZE_UNLOCK(uip);
 	PROC_UNLOCK(curproc);
 }
 
 void
 swap_release(vm_ooffset_t decr)
 {
 	struct ucred *cred;
 
 	PROC_LOCK(curproc);
 	cred = curthread->td_ucred;
 	swap_release_by_cred(decr, cred);
 	PROC_UNLOCK(curproc);
 }
 
 void
 swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
 {
  	struct uidinfo *uip;
 
 	uip = cred->cr_ruidinfo;
 
 	if (decr & PAGE_MASK)
 		panic("swap_release: & PAGE_MASK");
 
 	mtx_lock(&sw_dev_mtx);
 	if (swap_reserved < decr)
 		panic("swap_reserved < decr");
 	swap_reserved -= decr;
 	mtx_unlock(&sw_dev_mtx);
 
 	UIDINFO_VMSIZE_LOCK(uip);
 	if (uip->ui_vmsize < decr)
 		printf("negative vmsize for uid = %d\n", uip->ui_uid);
 	uip->ui_vmsize -= decr;
 	UIDINFO_VMSIZE_UNLOCK(uip);
 
 	racct_sub_cred(cred, RACCT_SWAP, decr);
 }
 
 #define SWM_FREE	0x02	/* free, period			*/
 #define SWM_POP		0x04	/* pop out			*/
 
 int swap_pager_full = 2;	/* swap space exhaustion (task killing) */
 static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
 static int nsw_rcount;		/* free read buffers			*/
 static int nsw_wcount_sync;	/* limit write buffers / synchronous	*/
 static int nsw_wcount_async;	/* limit write buffers / asynchronous	*/
 static int nsw_wcount_async_max;/* assigned maximum			*/
 static int nsw_cluster_max;	/* maximum VOP I/O allowed		*/
 
 static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
     "Maximum running async swap ops");
 
 static struct swblock **swhash;
 static int swhash_mask;
 static struct mtx swhash_mtx;
 
 static struct sx sw_alloc_sx;
 
 /*
  * "named" and "unnamed" anon region objects.  Try to reduce the overhead
  * of searching a named list by hashing it just a little.
  */
 
 #define NOBJLISTS		8
 
 #define NOBJLIST(handle)	\
 	(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
 
 static struct pagerlst	swap_pager_object_list[NOBJLISTS];
 static uma_zone_t	swap_zone;
 
 /*
  * pagerops for OBJT_SWAP - "swap pager".  Some ops are also global procedure
  * calls hooked from other parts of the VM system and do not appear here.
  * (see vm/swap_pager.h).
  */
 static vm_object_t
 		swap_pager_alloc(void *handle, vm_ooffset_t size,
 		    vm_prot_t prot, vm_ooffset_t offset, struct ucred *);
 static void	swap_pager_dealloc(vm_object_t object);
 static int	swap_pager_getpages(vm_object_t, vm_page_t *, int, int *,
     int *);
 static int	swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *,
     int *, pgo_getpages_iodone_t, void *);
 static void	swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
 static boolean_t
 		swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
 static void	swap_pager_init(void);
 static void	swap_pager_unswapped(vm_page_t);
 static void	swap_pager_swapoff(struct swdevt *sp);
 
 struct pagerops swappagerops = {
 	.pgo_init =	swap_pager_init,	/* early system initialization of pager	*/
 	.pgo_alloc =	swap_pager_alloc,	/* allocate an OBJT_SWAP object		*/
 	.pgo_dealloc =	swap_pager_dealloc,	/* deallocate an OBJT_SWAP object	*/
 	.pgo_getpages =	swap_pager_getpages,	/* pagein				*/
 	.pgo_getpages_async = swap_pager_getpages_async, /* pagein (async)		*/
 	.pgo_putpages =	swap_pager_putpages,	/* pageout				*/
 	.pgo_haspage =	swap_pager_haspage,	/* get backing store status for page	*/
 	.pgo_pageunswapped = swap_pager_unswapped,	/* remove swap related to page		*/
 };
 
 /*
  * dmmax is in page-sized chunks with the new swap system.  It was
  * dev-bsized chunks in the old.  dmmax is always a power of 2.
  *
  * swap_*() routines are externally accessible.  swp_*() routines are
  * internal.
  */
 static int dmmax;
 static int nswap_lowat = 128;	/* in pages, swap_pager_almost_full warn */
 static int nswap_hiwat = 512;	/* in pages, swap_pager_almost_full warn */
 
 SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &dmmax, 0,
     "Maximum size of a swap block");
 
 static void	swp_sizecheck(void);
 static void	swp_pager_async_iodone(struct buf *bp);
 static int	swapongeom(struct vnode *);
 static int	swaponvp(struct thread *, struct vnode *, u_long);
 static int	swapoff_one(struct swdevt *sp, struct ucred *cred);
 
 /*
  * Swap bitmap functions
  */
 static void	swp_pager_freeswapspace(daddr_t blk, int npages);
 static daddr_t	swp_pager_getswapspace(int npages);
 
 /*
  * Metadata functions
  */
 static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index);
 static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
 static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t);
 static void swp_pager_meta_free_all(vm_object_t);
 static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int);
 
 /*
  * SWP_SIZECHECK() -	update swap_pager_full indication
  *
  *	update the swap_pager_almost_full indication and warn when we are
  *	about to run out of swap space, using lowat/hiwat hysteresis.
  *
  *	Clear swap_pager_full ( task killing ) indication when lowat is met.
  *
  *	No restrictions on call
  *	This routine may not block.
  */
 static void
 swp_sizecheck(void)
 {
 
 	if (swap_pager_avail < nswap_lowat) {
 		if (swap_pager_almost_full == 0) {
 			printf("swap_pager: out of swap space\n");
 			swap_pager_almost_full = 1;
 		}
 	} else {
 		swap_pager_full = 0;
 		if (swap_pager_avail > nswap_hiwat)
 			swap_pager_almost_full = 0;
 	}
 }
 
 /*
  * SWP_PAGER_HASH() -	hash swap meta data
  *
  *	This is an helper function which hashes the swapblk given
  *	the object and page index.  It returns a pointer to a pointer
  *	to the object, or a pointer to a NULL pointer if it could not
  *	find a swapblk.
  */
 static struct swblock **
 swp_pager_hash(vm_object_t object, vm_pindex_t index)
 {
 	struct swblock **pswap;
 	struct swblock *swap;
 
 	index &= ~(vm_pindex_t)SWAP_META_MASK;
 	pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask];
 	while ((swap = *pswap) != NULL) {
 		if (swap->swb_object == object &&
 		    swap->swb_index == index
 		) {
 			break;
 		}
 		pswap = &swap->swb_hnext;
 	}
 	return (pswap);
 }
 
 /*
  * SWAP_PAGER_INIT() -	initialize the swap pager!
  *
  *	Expected to be started from system init.  NOTE:  This code is run
  *	before much else so be careful what you depend on.  Most of the VM
  *	system has yet to be initialized at this point.
  */
 static void
 swap_pager_init(void)
 {
 	/*
 	 * Initialize object lists
 	 */
 	int i;
 
 	for (i = 0; i < NOBJLISTS; ++i)
 		TAILQ_INIT(&swap_pager_object_list[i]);
 	mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF);
 	sx_init(&sw_alloc_sx, "swspsx");
 	sx_init(&swdev_syscall_lock, "swsysc");
 
 	/*
 	 * Device Stripe, in PAGE_SIZE'd blocks
 	 */
 	dmmax = SWB_NPAGES * 2;
 }
 
 /*
  * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process
  *
  *	Expected to be started from pageout process once, prior to entering
  *	its main loop.
  */
 void
 swap_pager_swap_init(void)
 {
 	unsigned long n, n2;
 
 	/*
 	 * Number of in-transit swap bp operations.  Don't
 	 * exhaust the pbufs completely.  Make sure we
 	 * initialize workable values (0 will work for hysteresis
 	 * but it isn't very efficient).
 	 *
 	 * The nsw_cluster_max is constrained by the bp->b_pages[]
 	 * array (MAXPHYS/PAGE_SIZE) and our locally defined
 	 * MAX_PAGEOUT_CLUSTER.   Also be aware that swap ops are
 	 * constrained by the swap device interleave stripe size.
 	 *
 	 * Currently we hardwire nsw_wcount_async to 4.  This limit is
 	 * designed to prevent other I/O from having high latencies due to
 	 * our pageout I/O.  The value 4 works well for one or two active swap
 	 * devices but is probably a little low if you have more.  Even so,
 	 * a higher value would probably generate only a limited improvement
 	 * with three or four active swap devices since the system does not
 	 * typically have to pageout at extreme bandwidths.   We will want
 	 * at least 2 per swap devices, and 4 is a pretty good value if you
 	 * have one NFS swap device due to the command/ack latency over NFS.
 	 * So it all works out pretty well.
 	 */
 	nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
 
 	mtx_lock(&pbuf_mtx);
 	nsw_rcount = (nswbuf + 1) / 2;
 	nsw_wcount_sync = (nswbuf + 3) / 4;
 	nsw_wcount_async = 4;
 	nsw_wcount_async_max = nsw_wcount_async;
 	mtx_unlock(&pbuf_mtx);
 
 	/*
 	 * Initialize our zone.  Right now I'm just guessing on the number
 	 * we need based on the number of pages in the system.  Each swblock
 	 * can hold 32 pages, so this is probably overkill.  This reservation
 	 * is typically limited to around 32MB by default.
 	 */
 	n = vm_cnt.v_page_count / 2;
 	if (maxswzone && n > maxswzone / sizeof(struct swblock))
 		n = maxswzone / sizeof(struct swblock);
 	n2 = n;
 	swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
 	if (swap_zone == NULL)
 		panic("failed to create swap_zone.");
 	do {
 		if (uma_zone_reserve_kva(swap_zone, n))
 			break;
 		/*
 		 * if the allocation failed, try a zone two thirds the
 		 * size of the previous attempt.
 		 */
 		n -= ((n + 2) / 3);
 	} while (n > 0);
 	if (n2 != n)
 		printf("Swap zone entries reduced from %lu to %lu.\n", n2, n);
 	swap_maxpages = n * SWAP_META_PAGES;
 	swzone = n * sizeof(struct swblock);
 	n2 = n;
 
 	/*
 	 * Initialize our meta-data hash table.  The swapper does not need to
 	 * be quite as efficient as the VM system, so we do not use an
 	 * oversized hash table.
 	 *
 	 * 	n: 		size of hash table, must be power of 2
 	 *	swhash_mask:	hash table index mask
 	 */
 	for (n = 1; n < n2 / 8; n *= 2)
 		;
 	swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO);
 	swhash_mask = n - 1;
 	mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF);
 }
 
 static vm_object_t
 swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size,
     vm_ooffset_t offset)
 {
 	vm_object_t object;
 
 	if (cred != NULL) {
 		if (!swap_reserve_by_cred(size, cred))
 			return (NULL);
 		crhold(cred);
 	}
 	object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset +
 	    PAGE_MASK + size));
 	object->handle = handle;
 	if (cred != NULL) {
 		object->cred = cred;
 		object->charge = size;
 	}
 	object->un_pager.swp.swp_bcount = 0;
 	return (object);
 }
 
 /*
  * SWAP_PAGER_ALLOC() -	allocate a new OBJT_SWAP VM object and instantiate
  *			its metadata structures.
  *
  *	This routine is called from the mmap and fork code to create a new
  *	OBJT_SWAP object.
  *
  *	This routine must ensure that no live duplicate is created for
  *	the named object request, which is protected against by
  *	holding the sw_alloc_sx lock in case handle != NULL.
  */
 static vm_object_t
 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t offset, struct ucred *cred)
 {
 	vm_object_t object;
 
 	if (handle != NULL) {
 		/*
 		 * Reference existing named region or allocate new one.  There
 		 * should not be a race here against swp_pager_meta_build()
 		 * as called from vm_page_remove() in regards to the lookup
 		 * of the handle.
 		 */
 		sx_xlock(&sw_alloc_sx);
 		object = vm_pager_object_lookup(NOBJLIST(handle), handle);
 		if (object == NULL) {
 			object = swap_pager_alloc_init(handle, cred, size,
 			    offset);
 			if (object != NULL) {
 				TAILQ_INSERT_TAIL(NOBJLIST(object->handle),
 				    object, pager_object_list);
 			}
 		}
 		sx_xunlock(&sw_alloc_sx);
 	} else {
 		object = swap_pager_alloc_init(handle, cred, size, offset);
 	}
 	return (object);
 }
 
 /*
  * SWAP_PAGER_DEALLOC() -	remove swap metadata from object
  *
  *	The swap backing for the object is destroyed.  The code is
  *	designed such that we can reinstantiate it later, but this
  *	routine is typically called only when the entire object is
  *	about to be destroyed.
  *
  *	The object must be locked.
  */
 static void
 swap_pager_dealloc(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_DEAD) != 0, ("dealloc of reachable obj"));
 
 	/*
 	 * Remove from list right away so lookups will fail if we block for
 	 * pageout completion.
 	 */
 	if (object->handle != NULL) {
 		VM_OBJECT_WUNLOCK(object);
 		sx_xlock(&sw_alloc_sx);
 		TAILQ_REMOVE(NOBJLIST(object->handle), object,
 		    pager_object_list);
 		sx_xunlock(&sw_alloc_sx);
 		VM_OBJECT_WLOCK(object);
 	}
 
 	vm_object_pip_wait(object, "swpdea");
 
 	/*
 	 * Free all remaining metadata.  We only bother to free it from
 	 * the swap meta data.  We do not attempt to free swapblk's still
 	 * associated with vm_page_t's for this object.  We do not care
 	 * if paging is still in progress on some objects.
 	 */
 	swp_pager_meta_free_all(object);
 	object->handle = NULL;
 	object->type = OBJT_DEAD;
 }
 
 /************************************************************************
  *			SWAP PAGER BITMAP ROUTINES			*
  ************************************************************************/
 
 /*
  * SWP_PAGER_GETSWAPSPACE() -	allocate raw swap space
  *
  *	Allocate swap for the requested number of pages.  The starting
  *	swap block number (a page index) is returned or SWAPBLK_NONE
  *	if the allocation failed.
  *
  *	Also has the side effect of advising that somebody made a mistake
  *	when they configured swap and didn't configure enough.
  *
  *	This routine may not sleep.
  *
  *	We allocate in round-robin fashion from the configured devices.
  */
 static daddr_t
 swp_pager_getswapspace(int npages)
 {
 	daddr_t blk;
 	struct swdevt *sp;
 	int i;
 
 	blk = SWAPBLK_NONE;
 	mtx_lock(&sw_dev_mtx);
 	sp = swdevhd;
 	for (i = 0; i < nswapdev; i++) {
 		if (sp == NULL)
 			sp = TAILQ_FIRST(&swtailq);
 		if (!(sp->sw_flags & SW_CLOSING)) {
 			blk = blist_alloc(sp->sw_blist, npages);
 			if (blk != SWAPBLK_NONE) {
 				blk += sp->sw_first;
 				sp->sw_used += npages;
 				swap_pager_avail -= npages;
 				swp_sizecheck();
 				swdevhd = TAILQ_NEXT(sp, sw_list);
 				goto done;
 			}
 		}
 		sp = TAILQ_NEXT(sp, sw_list);
 	}
 	if (swap_pager_full != 2) {
 		printf("swap_pager_getswapspace(%d): failed\n", npages);
 		swap_pager_full = 2;
 		swap_pager_almost_full = 1;
 	}
 	swdevhd = NULL;
 done:
 	mtx_unlock(&sw_dev_mtx);
 	return (blk);
 }
 
 static int
 swp_pager_isondev(daddr_t blk, struct swdevt *sp)
 {
 
 	return (blk >= sp->sw_first && blk < sp->sw_end);
 }
 
 static void
 swp_pager_strategy(struct buf *bp)
 {
 	struct swdevt *sp;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (bp->b_blkno >= sp->sw_first && bp->b_blkno < sp->sw_end) {
 			mtx_unlock(&sw_dev_mtx);
 			if ((sp->sw_flags & SW_UNMAPPED) != 0 &&
 			    unmapped_buf_allowed) {
 				bp->b_data = unmapped_buf;
 				bp->b_offset = 0;
 			} else {
 				pmap_qenter((vm_offset_t)bp->b_data,
 				    &bp->b_pages[0], bp->b_bcount / PAGE_SIZE);
 			}
 			sp->sw_strategy(bp, sp);
 			return;
 		}
 	}
 	panic("Swapdev not found");
 }
 
 
 /*
  * SWP_PAGER_FREESWAPSPACE() -	free raw swap space
  *
  *	This routine returns the specified swap blocks back to the bitmap.
  *
  *	This routine may not sleep.
  */
 static void
 swp_pager_freeswapspace(daddr_t blk, int npages)
 {
 	struct swdevt *sp;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (blk >= sp->sw_first && blk < sp->sw_end) {
 			sp->sw_used -= npages;
 			/*
 			 * If we are attempting to stop swapping on
 			 * this device, we don't want to mark any
 			 * blocks free lest they be reused.
 			 */
 			if ((sp->sw_flags & SW_CLOSING) == 0) {
 				blist_free(sp->sw_blist, blk - sp->sw_first,
 				    npages);
 				swap_pager_avail += npages;
 				swp_sizecheck();
 			}
 			mtx_unlock(&sw_dev_mtx);
 			return;
 		}
 	}
 	panic("Swapdev not found");
 }
 
 /*
  * SWAP_PAGER_FREESPACE() -	frees swap blocks associated with a page
  *				range within an object.
  *
  *	This is a globally accessible routine.
  *
  *	This routine removes swapblk assignments from swap metadata.
  *
  *	The external callers of this routine typically have already destroyed
  *	or renamed vm_page_t's associated with this range in the object so
  *	we should be ok.
  *
  *	The object must be locked.
  */
 void
 swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size)
 {
 
 	swp_pager_meta_free(object, start, size);
 }
 
 /*
  * SWAP_PAGER_RESERVE() - reserve swap blocks in object
  *
  *	Assigns swap blocks to the specified range within the object.  The
  *	swap blocks are not zeroed.  Any previous swap assignment is destroyed.
  *
  *	Returns 0 on success, -1 on failure.
  */
 int
 swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size)
 {
 	int n = 0;
 	daddr_t blk = SWAPBLK_NONE;
 	vm_pindex_t beg = start;	/* save start index */
 
 	VM_OBJECT_WLOCK(object);
 	while (size) {
 		if (n == 0) {
 			n = BLIST_MAX_ALLOC;
 			while ((blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE) {
 				n >>= 1;
 				if (n == 0) {
 					swp_pager_meta_free(object, beg, start - beg);
 					VM_OBJECT_WUNLOCK(object);
 					return (-1);
 				}
 			}
 		}
 		swp_pager_meta_build(object, start, blk);
 		--size;
 		++start;
 		++blk;
 		--n;
 	}
 	swp_pager_meta_free(object, start, n);
 	VM_OBJECT_WUNLOCK(object);
 	return (0);
 }
 
 /*
  * SWAP_PAGER_COPY() -  copy blocks from source pager to destination pager
  *			and destroy the source.
  *
  *	Copy any valid swapblks from the source to the destination.  In
  *	cases where both the source and destination have a valid swapblk,
  *	we keep the destination's.
  *
  *	This routine is allowed to sleep.  It may sleep allocating metadata
  *	indirectly through swp_pager_meta_build() or if paging is still in
  *	progress on the source.
  *
  *	The source object contains no vm_page_t's (which is just as well)
  *
  *	The source object is of type OBJT_SWAP.
  *
  *	The source and destination objects must be locked.
  *	Both object locks may temporarily be released.
  */
 void
 swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject,
     vm_pindex_t offset, int destroysource)
 {
 	vm_pindex_t i;
 
 	VM_OBJECT_ASSERT_WLOCKED(srcobject);
 	VM_OBJECT_ASSERT_WLOCKED(dstobject);
 
 	/*
 	 * If destroysource is set, we remove the source object from the
 	 * swap_pager internal queue now.
 	 */
 	if (destroysource && srcobject->handle != NULL) {
 		vm_object_pip_add(srcobject, 1);
 		VM_OBJECT_WUNLOCK(srcobject);
 		vm_object_pip_add(dstobject, 1);
 		VM_OBJECT_WUNLOCK(dstobject);
 		sx_xlock(&sw_alloc_sx);
 		TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject,
 		    pager_object_list);
 		sx_xunlock(&sw_alloc_sx);
 		VM_OBJECT_WLOCK(dstobject);
 		vm_object_pip_wakeup(dstobject);
 		VM_OBJECT_WLOCK(srcobject);
 		vm_object_pip_wakeup(srcobject);
 	}
 
 	/*
 	 * transfer source to destination.
 	 */
 	for (i = 0; i < dstobject->size; ++i) {
 		daddr_t dstaddr;
 
 		/*
 		 * Locate (without changing) the swapblk on the destination,
 		 * unless it is invalid in which case free it silently, or
 		 * if the destination is a resident page, in which case the
 		 * source is thrown away.
 		 */
 		dstaddr = swp_pager_meta_ctl(dstobject, i, 0);
 
 		if (dstaddr == SWAPBLK_NONE) {
 			/*
 			 * Destination has no swapblk and is not resident,
 			 * copy source.
 			 */
 			daddr_t srcaddr;
 
 			srcaddr = swp_pager_meta_ctl(
 			    srcobject,
 			    i + offset,
 			    SWM_POP
 			);
 
 			if (srcaddr != SWAPBLK_NONE) {
 				/*
 				 * swp_pager_meta_build() can sleep.
 				 */
 				vm_object_pip_add(srcobject, 1);
 				VM_OBJECT_WUNLOCK(srcobject);
 				vm_object_pip_add(dstobject, 1);
 				swp_pager_meta_build(dstobject, i, srcaddr);
 				vm_object_pip_wakeup(dstobject);
 				VM_OBJECT_WLOCK(srcobject);
 				vm_object_pip_wakeup(srcobject);
 			}
 		} else {
 			/*
 			 * Destination has valid swapblk or it is represented
 			 * by a resident page.  We destroy the sourceblock.
 			 */
 
 			swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE);
 		}
 	}
 
 	/*
 	 * Free left over swap blocks in source.
 	 *
 	 * We have to revert the type to OBJT_DEFAULT so we do not accidentally
 	 * double-remove the object from the swap queues.
 	 */
 	if (destroysource) {
 		swp_pager_meta_free_all(srcobject);
 		/*
 		 * Reverting the type is not necessary, the caller is going
 		 * to destroy srcobject directly, but I'm doing it here
 		 * for consistency since we've removed the object from its
 		 * queues.
 		 */
 		srcobject->type = OBJT_DEFAULT;
 	}
 }
 
 /*
  * SWAP_PAGER_HASPAGE() -	determine if we have good backing store for
  *				the requested page.
  *
  *	We determine whether good backing store exists for the requested
  *	page and return TRUE if it does, FALSE if it doesn't.
  *
  *	If TRUE, we also try to determine how much valid, contiguous backing
  *	store exists before and after the requested page.
  */
 static boolean_t
 swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
     int *after)
 {
 	daddr_t blk, blk0;
 	int i;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 
 	/*
 	 * do we have good backing store at the requested index ?
 	 */
 	blk0 = swp_pager_meta_ctl(object, pindex, 0);
 	if (blk0 == SWAPBLK_NONE) {
 		if (before)
 			*before = 0;
 		if (after)
 			*after = 0;
 		return (FALSE);
 	}
 
 	/*
 	 * find backwards-looking contiguous good backing store
 	 */
 	if (before != NULL) {
 		for (i = 1; i < SWB_NPAGES; i++) {
 			if (i > pindex)
 				break;
 			blk = swp_pager_meta_ctl(object, pindex - i, 0);
 			if (blk != blk0 - i)
 				break;
 		}
 		*before = i - 1;
 	}
 
 	/*
 	 * find forward-looking contiguous good backing store
 	 */
 	if (after != NULL) {
 		for (i = 1; i < SWB_NPAGES; i++) {
 			blk = swp_pager_meta_ctl(object, pindex + i, 0);
 			if (blk != blk0 + i)
 				break;
 		}
 		*after = i - 1;
 	}
 	return (TRUE);
 }
 
 /*
  * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
  *
  *	This removes any associated swap backing store, whether valid or
  *	not, from the page.
  *
  *	This routine is typically called when a page is made dirty, at
  *	which point any associated swap can be freed.  MADV_FREE also
  *	calls us in a special-case situation
  *
  *	NOTE!!!  If the page is clean and the swap was valid, the caller
  *	should make the page dirty before calling this routine.  This routine
  *	does NOT change the m->dirty status of the page.  Also: MADV_FREE
  *	depends on it.
  *
  *	This routine may not sleep.
  *
  *	The object containing the page must be locked.
  */
 static void
 swap_pager_unswapped(vm_page_t m)
 {
 
 	swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE);
 }
 
 /*
  * swap_pager_getpages() - bring pages in from swap
  *
  *	Attempt to page in the pages in array "m" of length "count".  The caller
  *	may optionally specify that additional pages preceding and succeeding
  *	the specified range be paged in.  The number of such pages is returned
  *	in the "rbehind" and "rahead" parameters, and they will be in the
  *	inactive queue upon return.
  *
  *	The pages in "m" must be busied and will remain busied upon return.
  */
 static int
 swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind,
     int *rahead)
 {
 	struct buf *bp;
 	vm_page_t mpred, msucc, p;
 	vm_pindex_t pindex;
 	daddr_t blk;
 	int i, j, maxahead, maxbehind, reqcount, shift;
 
 	reqcount = count;
 
 	VM_OBJECT_WUNLOCK(object);
 	bp = getpbuf(&nsw_rcount);
 	VM_OBJECT_WLOCK(object);
 
 	if (!swap_pager_haspage(object, m[0]->pindex, &maxbehind, &maxahead)) {
 		relpbuf(bp, &nsw_rcount);
 		return (VM_PAGER_FAIL);
 	}
 
 	/*
 	 * Clip the readahead and readbehind ranges to exclude resident pages.
 	 */
 	if (rahead != NULL) {
 		KASSERT(reqcount - 1 <= maxahead,
 		    ("page count %d extends beyond swap block", reqcount));
 		*rahead = imin(*rahead, maxahead - (reqcount - 1));
 		pindex = m[reqcount - 1]->pindex;
 		msucc = TAILQ_NEXT(m[reqcount - 1], listq);
 		if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead)
 			*rahead = msucc->pindex - pindex - 1;
 	}
 	if (rbehind != NULL) {
 		*rbehind = imin(*rbehind, maxbehind);
 		pindex = m[0]->pindex;
 		mpred = TAILQ_PREV(m[0], pglist, listq);
 		if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind)
 			*rbehind = pindex - mpred->pindex - 1;
 	}
 
 	/*
 	 * Allocate readahead and readbehind pages.
 	 */
 	shift = rbehind != NULL ? *rbehind : 0;
 	if (shift != 0) {
 		for (i = 1; i <= shift; i++) {
 			p = vm_page_alloc(object, m[0]->pindex - i,
 			    VM_ALLOC_NORMAL);
 			if (p == NULL) {
 				/* Shift allocated pages to the left. */
 				for (j = 0; j < i - 1; j++)
 					bp->b_pages[j] =
 					    bp->b_pages[j + shift - i + 1];
 				break;
 			}
 			bp->b_pages[shift - i] = p;
 		}
 		shift = i - 1;
 		*rbehind = shift;
 	}
 	for (i = 0; i < reqcount; i++)
 		bp->b_pages[i + shift] = m[i];
 	if (rahead != NULL) {
 		for (i = 0; i < *rahead; i++) {
 			p = vm_page_alloc(object,
 			    m[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				break;
 			bp->b_pages[shift + reqcount + i] = p;
 		}
 		*rahead = i;
 	}
 	if (rbehind != NULL)
 		count += *rbehind;
 	if (rahead != NULL)
 		count += *rahead;
 
 	vm_object_pip_add(object, count);
 
 	for (i = 0; i < count; i++)
 		bp->b_pages[i]->oflags |= VPO_SWAPINPROG;
 
 	pindex = bp->b_pages[0]->pindex;
 	blk = swp_pager_meta_ctl(object, pindex, 0);
 	KASSERT(blk != SWAPBLK_NONE,
 	    ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
 
 	VM_OBJECT_WUNLOCK(object);
 
 	bp->b_flags |= B_PAGING;
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = swp_pager_async_iodone;
 	bp->b_rcred = crhold(thread0.td_ucred);
 	bp->b_wcred = crhold(thread0.td_ucred);
 	bp->b_blkno = blk;
 	bp->b_bcount = PAGE_SIZE * count;
 	bp->b_bufsize = PAGE_SIZE * count;
 	bp->b_npages = count;
 	bp->b_pgbefore = rbehind != NULL ? *rbehind : 0;
 	bp->b_pgafter = rahead != NULL ? *rahead : 0;
 
 	VM_CNT_INC(v_swapin);
 	VM_CNT_ADD(v_swappgsin, count);
 
 	/*
 	 * perform the I/O.  NOTE!!!  bp cannot be considered valid after
 	 * this point because we automatically release it on completion.
 	 * Instead, we look at the one page we are interested in which we
 	 * still hold a lock on even through the I/O completion.
 	 *
 	 * The other pages in our m[] array are also released on completion,
 	 * so we cannot assume they are valid anymore either.
 	 *
 	 * NOTE: b_blkno is destroyed by the call to swapdev_strategy
 	 */
 	BUF_KERNPROC(bp);
 	swp_pager_strategy(bp);
 
 	/*
 	 * Wait for the pages we want to complete.  VPO_SWAPINPROG is always
 	 * cleared on completion.  If an I/O error occurs, SWAPBLK_NONE
 	 * is set in the metadata for each page in the request.
 	 */
 	VM_OBJECT_WLOCK(object);
 	while ((m[0]->oflags & VPO_SWAPINPROG) != 0) {
 		m[0]->oflags |= VPO_SWAPSLEEP;
 		VM_CNT_INC(v_intrans);
 		if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP,
 		    "swread", hz * 20)) {
 			printf(
 "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n",
 			    bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
 		}
 	}
 
 	/*
 	 * If we had an unrecoverable read error pages will not be valid.
 	 */
 	for (i = 0; i < reqcount; i++)
 		if (m[i]->valid != VM_PAGE_BITS_ALL)
 			return (VM_PAGER_ERROR);
 
 	return (VM_PAGER_OK);
 
 	/*
 	 * A final note: in a low swap situation, we cannot deallocate swap
 	 * and mark a page dirty here because the caller is likely to mark
 	 * the page clean when we return, causing the page to possibly revert
 	 * to all-zero's later.
 	 */
 }
 
 /*
  * 	swap_pager_getpages_async():
  *
  *	Right now this is emulation of asynchronous operation on top of
  *	swap_pager_getpages().
  */
 static int
 swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count,
     int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg)
 {
 	int r, error;
 
 	r = swap_pager_getpages(object, m, count, rbehind, rahead);
 	VM_OBJECT_WUNLOCK(object);
 	switch (r) {
 	case VM_PAGER_OK:
 		error = 0;
 		break;
 	case VM_PAGER_ERROR:
 		error = EIO;
 		break;
 	case VM_PAGER_FAIL:
 		error = EINVAL;
 		break;
 	default:
 		panic("unhandled swap_pager_getpages() error %d", r);
 	}
 	(iodone)(arg, m, count, error);
 	VM_OBJECT_WLOCK(object);
 
 	return (r);
 }
 
 /*
  *	swap_pager_putpages:
  *
  *	Assign swap (if necessary) and initiate I/O on the specified pages.
  *
  *	We support both OBJT_DEFAULT and OBJT_SWAP objects.  DEFAULT objects
  *	are automatically converted to SWAP objects.
  *
  *	In a low memory situation we may block in VOP_STRATEGY(), but the new
  *	vm_page reservation system coupled with properly written VFS devices
  *	should ensure that no low-memory deadlock occurs.  This is an area
  *	which needs work.
  *
  *	The parent has N vm_object_pip_add() references prior to
  *	calling us and will remove references for rtvals[] that are
  *	not set to VM_PAGER_PEND.  We need to remove the rest on I/O
  *	completion.
  *
  *	The parent has soft-busy'd the pages it passes us and will unbusy
  *	those whos rtvals[] entry is not set to VM_PAGER_PEND on return.
  *	We need to unbusy the rest on I/O completion.
  */
 static void
 swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
     int flags, int *rtvals)
 {
 	int i, n;
 	boolean_t sync;
 
 	if (count && m[0]->object != object) {
 		panic("swap_pager_putpages: object mismatch %p/%p",
 		    object,
 		    m[0]->object
 		);
 	}
 
 	/*
 	 * Step 1
 	 *
 	 * Turn object into OBJT_SWAP
 	 * check for bogus sysops
 	 * force sync if not pageout process
 	 */
 	if (object->type != OBJT_SWAP)
 		swp_pager_meta_build(object, 0, SWAPBLK_NONE);
 	VM_OBJECT_WUNLOCK(object);
 
 	n = 0;
 	if (curproc != pageproc)
 		sync = TRUE;
 	else
 		sync = (flags & VM_PAGER_PUT_SYNC) != 0;
 
 	/*
 	 * Step 2
 	 *
 	 * Assign swap blocks and issue I/O.  We reallocate swap on the fly.
 	 * The page is left dirty until the pageout operation completes
 	 * successfully.
 	 */
 	for (i = 0; i < count; i += n) {
 		int j;
 		struct buf *bp;
 		daddr_t blk;
 
 		/*
 		 * Maximum I/O size is limited by a number of factors.
 		 */
 		n = min(BLIST_MAX_ALLOC, count - i);
 		n = min(n, nsw_cluster_max);
 
 		/*
 		 * Get biggest block of swap we can.  If we fail, fall
 		 * back and try to allocate a smaller block.  Don't go
 		 * overboard trying to allocate space if it would overly
 		 * fragment swap.
 		 */
 		while (
 		    (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE &&
 		    n > 4
 		) {
 			n >>= 1;
 		}
 		if (blk == SWAPBLK_NONE) {
 			for (j = 0; j < n; ++j)
 				rtvals[i+j] = VM_PAGER_FAIL;
 			continue;
 		}
 
 		/*
 		 * All I/O parameters have been satisfied, build the I/O
 		 * request and assign the swap space.
 		 */
 		if (sync == TRUE) {
 			bp = getpbuf(&nsw_wcount_sync);
 		} else {
 			bp = getpbuf(&nsw_wcount_async);
 			bp->b_flags = B_ASYNC;
 		}
 		bp->b_flags |= B_PAGING;
 		bp->b_iocmd = BIO_WRITE;
 
 		bp->b_rcred = crhold(thread0.td_ucred);
 		bp->b_wcred = crhold(thread0.td_ucred);
 		bp->b_bcount = PAGE_SIZE * n;
 		bp->b_bufsize = PAGE_SIZE * n;
 		bp->b_blkno = blk;
 
 		VM_OBJECT_WLOCK(object);
 		for (j = 0; j < n; ++j) {
 			vm_page_t mreq = m[i+j];
 
 			swp_pager_meta_build(
 			    mreq->object,
 			    mreq->pindex,
 			    blk + j
 			);
 			vm_page_dirty(mreq);
 			mreq->oflags |= VPO_SWAPINPROG;
 			bp->b_pages[j] = mreq;
 		}
 		VM_OBJECT_WUNLOCK(object);
 		bp->b_npages = n;
 		/*
 		 * Must set dirty range for NFS to work.
 		 */
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bcount;
 
 		VM_CNT_INC(v_swapout);
 		VM_CNT_ADD(v_swappgsout, bp->b_npages);
 
 		/*
 		 * We unconditionally set rtvals[] to VM_PAGER_PEND so that we
 		 * can call the async completion routine at the end of a
 		 * synchronous I/O operation.  Otherwise, our caller would
 		 * perform duplicate unbusy and wakeup operations on the page
 		 * and object, respectively.
 		 */
 		for (j = 0; j < n; j++)
 			rtvals[i + j] = VM_PAGER_PEND;
 
 		/*
 		 * asynchronous
 		 *
 		 * NOTE: b_blkno is destroyed by the call to swapdev_strategy
 		 */
 		if (sync == FALSE) {
 			bp->b_iodone = swp_pager_async_iodone;
 			BUF_KERNPROC(bp);
 			swp_pager_strategy(bp);
 			continue;
 		}
 
 		/*
 		 * synchronous
 		 *
 		 * NOTE: b_blkno is destroyed by the call to swapdev_strategy
 		 */
 		bp->b_iodone = bdone;
 		swp_pager_strategy(bp);
 
 		/*
 		 * Wait for the sync I/O to complete.
 		 */
 		bwait(bp, PVM, "swwrt");
 
 		/*
 		 * Now that we are through with the bp, we can call the
 		 * normal async completion, which frees everything up.
 		 */
 		swp_pager_async_iodone(bp);
 	}
 	VM_OBJECT_WLOCK(object);
 }
 
 /*
  *	swp_pager_async_iodone:
  *
  *	Completion routine for asynchronous reads and writes from/to swap.
  *	Also called manually by synchronous code to finish up a bp.
  *
  *	This routine may not sleep.
  */
 static void
 swp_pager_async_iodone(struct buf *bp)
 {
 	int i;
 	vm_object_t object = NULL;
 
 	/*
 	 * report error
 	 */
 	if (bp->b_ioflags & BIO_ERROR) {
 		printf(
 		    "swap_pager: I/O error - %s failed; blkno %ld,"
 			"size %ld, error %d\n",
 		    ((bp->b_iocmd == BIO_READ) ? "pagein" : "pageout"),
 		    (long)bp->b_blkno,
 		    (long)bp->b_bcount,
 		    bp->b_error
 		);
 	}
 
 	/*
 	 * remove the mapping for kernel virtual
 	 */
 	if (buf_mapped(bp))
 		pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
 	else
 		bp->b_data = bp->b_kvabase;
 
 	if (bp->b_npages) {
 		object = bp->b_pages[0]->object;
 		VM_OBJECT_WLOCK(object);
 	}
 
 	/*
 	 * cleanup pages.  If an error occurs writing to swap, we are in
 	 * very serious trouble.  If it happens to be a disk error, though,
 	 * we may be able to recover by reassigning the swap later on.  So
 	 * in this case we remove the m->swapblk assignment for the page
 	 * but do not free it in the rlist.  The errornous block(s) are thus
 	 * never reallocated as swap.  Redirty the page and continue.
 	 */
 	for (i = 0; i < bp->b_npages; ++i) {
 		vm_page_t m = bp->b_pages[i];
 
 		m->oflags &= ~VPO_SWAPINPROG;
 		if (m->oflags & VPO_SWAPSLEEP) {
 			m->oflags &= ~VPO_SWAPSLEEP;
 			wakeup(&object->paging_in_progress);
 		}
 
 		if (bp->b_ioflags & BIO_ERROR) {
 			/*
 			 * If an error occurs I'd love to throw the swapblk
 			 * away without freeing it back to swapspace, so it
 			 * can never be used again.  But I can't from an
 			 * interrupt.
 			 */
 			if (bp->b_iocmd == BIO_READ) {
 				/*
 				 * NOTE: for reads, m->dirty will probably
 				 * be overridden by the original caller of
 				 * getpages so don't play cute tricks here.
 				 */
 				m->valid = 0;
 			} else {
 				/*
 				 * If a write error occurs, reactivate page
 				 * so it doesn't clog the inactive list,
 				 * then finish the I/O.
 				 */
 				vm_page_dirty(m);
 				vm_page_lock(m);
 				vm_page_activate(m);
 				vm_page_unlock(m);
 				vm_page_sunbusy(m);
 			}
 		} else if (bp->b_iocmd == BIO_READ) {
 			/*
 			 * NOTE: for reads, m->dirty will probably be
 			 * overridden by the original caller of getpages so
 			 * we cannot set them in order to free the underlying
 			 * swap in a low-swap situation.  I don't think we'd
 			 * want to do that anyway, but it was an optimization
 			 * that existed in the old swapper for a time before
 			 * it got ripped out due to precisely this problem.
 			 */
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("swp_pager_async_iodone: page %p is mapped", m));
 			KASSERT(m->dirty == 0,
 			    ("swp_pager_async_iodone: page %p is dirty", m));
 
 			m->valid = VM_PAGE_BITS_ALL;
 			if (i < bp->b_pgbefore ||
 			    i >= bp->b_npages - bp->b_pgafter)
 				vm_page_readahead_finish(m);
 		} else {
 			/*
 			 * For write success, clear the dirty
 			 * status, then finish the I/O ( which decrements the
 			 * busy count and possibly wakes waiter's up ).
 			 * A page is only written to swap after a period of
 			 * inactivity.  Therefore, we do not expect it to be
 			 * reused.
 			 */
 			KASSERT(!pmap_page_is_write_mapped(m),
 			    ("swp_pager_async_iodone: page %p is not write"
 			    " protected", m));
 			vm_page_undirty(m);
 			vm_page_lock(m);
 			vm_page_deactivate_noreuse(m);
 			vm_page_unlock(m);
 			vm_page_sunbusy(m);
 		}
 	}
 
 	/*
 	 * adjust pip.  NOTE: the original parent may still have its own
 	 * pip refs on the object.
 	 */
 	if (object != NULL) {
 		vm_object_pip_wakeupn(object, bp->b_npages);
 		VM_OBJECT_WUNLOCK(object);
 	}
 
 	/*
 	 * swapdev_strategy() manually sets b_vp and b_bufobj before calling
 	 * bstrategy(). Set them back to NULL now we're done with it, or we'll
 	 * trigger a KASSERT in relpbuf().
 	 */
 	if (bp->b_vp) {
 		    bp->b_vp = NULL;
 		    bp->b_bufobj = NULL;
 	}
 	/*
 	 * release the physical I/O buffer
 	 */
 	relpbuf(
 	    bp,
 	    ((bp->b_iocmd == BIO_READ) ? &nsw_rcount :
 		((bp->b_flags & B_ASYNC) ?
 		    &nsw_wcount_async :
 		    &nsw_wcount_sync
 		)
 	    )
 	);
 }
 
 /*
  *	swap_pager_isswapped:
  *
  *	Return 1 if at least one page in the given object is paged
  *	out to the given swap device.
  *
  *	This routine may not sleep.
  */
 int
 swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
 {
 	daddr_t index = 0;
 	int bcount;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_SWAP)
 		return (0);
 
 	mtx_lock(&swhash_mtx);
 	for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
 		struct swblock *swap;
 
 		if ((swap = *swp_pager_hash(object, index)) != NULL) {
 			for (i = 0; i < SWAP_META_PAGES; ++i) {
 				if (swp_pager_isondev(swap->swb_pages[i], sp)) {
 					mtx_unlock(&swhash_mtx);
 					return (1);
 				}
 			}
 		}
 		index += SWAP_META_PAGES;
 	}
 	mtx_unlock(&swhash_mtx);
 	return (0);
 }
 
 int
 swap_pager_nswapdev(void)
 {
 
 	return (nswapdev);
 }
 
 /*
  * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
  *
  *	This routine dissociates the page at the given index within an object
  *	from its backing store, paging it in if it does not reside in memory.
  *	If the page is paged in, it is marked dirty and placed in the laundry
  *	queue.  The page is marked dirty because it no longer has backing
  *	store.  It is placed in the laundry queue because it has not been
  *	accessed recently.  Otherwise, it would already reside in memory.
  *
  *	We also attempt to swap in all other pages in the swap block.
  *	However, we only guarantee that the one at the specified index is
  *	paged in.
  *
  *	XXX - The code to page the whole block in doesn't work, so we
  *	      revert to the one-by-one behavior for now.  Sigh.
  */
 static inline void
 swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	vm_object_pip_add(object, 1);
 	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL);
 	if (m->valid == VM_PAGE_BITS_ALL) {
 		vm_object_pip_wakeup(object);
 		vm_page_dirty(m);
 		vm_page_lock(m);
 		vm_page_activate(m);
 		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 		vm_pager_page_unswapped(m);
 		return;
 	}
 
 	if (swap_pager_getpages(object, &m, 1, NULL, NULL) != VM_PAGER_OK)
 		panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
 	vm_object_pip_wakeup(object);
 	vm_page_dirty(m);
 	vm_page_lock(m);
 	vm_page_launder(m);
 	vm_page_unlock(m);
 	vm_page_xunbusy(m);
 	vm_pager_page_unswapped(m);
 }
 
 /*
  *	swap_pager_swapoff:
  *
  *	Page in all of the pages that have been paged out to the
  *	given device.  The corresponding blocks in the bitmap must be
  *	marked as allocated and the device must be flagged SW_CLOSING.
  *	There may be no processes swapped out to the device.
  *
  *	This routine may block.
  */
 static void
 swap_pager_swapoff(struct swdevt *sp)
 {
 	struct swblock *swap;
 	vm_object_t locked_obj, object;
 	vm_pindex_t pindex;
 	int i, j, retries;
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 
 	retries = 0;
 	locked_obj = NULL;
 full_rescan:
 	mtx_lock(&swhash_mtx);
 	for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
 restart:
 		for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) {
 			object = swap->swb_object;
 			pindex = swap->swb_index;
 			for (j = 0; j < SWAP_META_PAGES; ++j) {
 				if (!swp_pager_isondev(swap->swb_pages[j], sp))
 					continue;
 				if (locked_obj != object) {
 					if (locked_obj != NULL)
 						VM_OBJECT_WUNLOCK(locked_obj);
 					locked_obj = object;
 					if (!VM_OBJECT_TRYWLOCK(object)) {
 						mtx_unlock(&swhash_mtx);
 						/* Depends on type-stability. */
 						VM_OBJECT_WLOCK(object);
 						mtx_lock(&swhash_mtx);
 						goto restart;
 					}
 				}
 				MPASS(locked_obj == object);
 				mtx_unlock(&swhash_mtx);
 				swp_pager_force_pagein(object, pindex + j);
 				mtx_lock(&swhash_mtx);
 				goto restart;
 			}
 		}
 	}
 	mtx_unlock(&swhash_mtx);
 	if (locked_obj != NULL) {
 		VM_OBJECT_WUNLOCK(locked_obj);
 		locked_obj = NULL;
 	}
 	if (sp->sw_used) {
 		/*
 		 * Objects may be locked or paging to the device being
 		 * removed, so we will miss their pages and need to
 		 * make another pass.  We have marked this device as
 		 * SW_CLOSING, so the activity should finish soon.
 		 */
 		retries++;
 		if (retries > 100) {
 			panic("swapoff: failed to locate %d swap blocks",
 			    sp->sw_used);
 		}
 		pause("swpoff", hz / 20);
 		goto full_rescan;
 	}
 	EVENTHANDLER_INVOKE(swapoff, sp);
 }
 
 /************************************************************************
  *				SWAP META DATA 				*
  ************************************************************************
  *
  *	These routines manipulate the swap metadata stored in the
  *	OBJT_SWAP object.
  *
  *	Swap metadata is implemented with a global hash and not directly
  *	linked into the object.  Instead the object simply contains
  *	appropriate tracking counters.
  */
 
 /*
  * SWP_PAGER_META_BUILD() -	add swap block to swap meta data for object
  *
  *	We first convert the object to a swap object if it is a default
  *	object.
  *
  *	The specified swapblk is added to the object's swap metadata.  If
  *	the swapblk is not valid, it is freed instead.  Any previously
  *	assigned swapblk is freed.
  */
 static void
 swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
 {
 	static volatile int exhausted;
 	struct swblock *swap;
 	struct swblock **pswap;
 	int idx;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	/*
 	 * Convert default object to swap object if necessary
 	 */
 	if (object->type != OBJT_SWAP) {
 		object->type = OBJT_SWAP;
 		object->un_pager.swp.swp_bcount = 0;
 		KASSERT(object->handle == NULL, ("default pager with handle"));
 	}
 
 	/*
 	 * Locate hash entry.  If not found create, but if we aren't adding
 	 * anything just return.  If we run out of space in the map we wait
 	 * and, since the hash table may have changed, retry.
 	 */
 retry:
 	mtx_lock(&swhash_mtx);
 	pswap = swp_pager_hash(object, pindex);
 
 	if ((swap = *pswap) == NULL) {
 		int i;
 
 		if (swapblk == SWAPBLK_NONE)
 			goto done;
 
 		swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT |
 		    (curproc == pageproc ? M_USE_RESERVE : 0));
 		if (swap == NULL) {
 			mtx_unlock(&swhash_mtx);
 			VM_OBJECT_WUNLOCK(object);
 			if (uma_zone_exhausted(swap_zone)) {
 				if (atomic_cmpset_int(&exhausted, 0, 1))
 					printf("swap zone exhausted, "
 					    "increase kern.maxswzone\n");
 				vm_pageout_oom(VM_OOM_SWAPZ);
 				pause("swzonex", 10);
 			} else
 				VM_WAIT;
 			VM_OBJECT_WLOCK(object);
 			goto retry;
 		}
 
 		if (atomic_cmpset_int(&exhausted, 1, 0))
 			printf("swap zone ok\n");
 
 		swap->swb_hnext = NULL;
 		swap->swb_object = object;
 		swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK;
 		swap->swb_count = 0;
 
 		++object->un_pager.swp.swp_bcount;
 
 		for (i = 0; i < SWAP_META_PAGES; ++i)
 			swap->swb_pages[i] = SWAPBLK_NONE;
 	}
 
 	/*
 	 * Delete prior contents of metadata
 	 */
 	idx = pindex & SWAP_META_MASK;
 
 	if (swap->swb_pages[idx] != SWAPBLK_NONE) {
 		swp_pager_freeswapspace(swap->swb_pages[idx], 1);
 		--swap->swb_count;
 	}
 
 	/*
 	 * Enter block into metadata
 	 */
 	swap->swb_pages[idx] = swapblk;
 	if (swapblk != SWAPBLK_NONE)
 		++swap->swb_count;
 done:
 	mtx_unlock(&swhash_mtx);
 }
 
 /*
  * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata
  *
  *	The requested range of blocks is freed, with any associated swap
  *	returned to the swap bitmap.
  *
  *	This routine will free swap metadata structures as they are cleaned
  *	out.  This routine does *NOT* operate on swap metadata associated
  *	with resident pages.
  */
 static void
 swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count)
 {
 	struct swblock **pswap, *swap;
 	vm_pindex_t c;
 	daddr_t v;
 	int n, sidx;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (object->type != OBJT_SWAP || count == 0)
 		return;
 
 	mtx_lock(&swhash_mtx);
 	for (c = 0; c < count;) {
 		pswap = swp_pager_hash(object, index);
 		sidx = index & SWAP_META_MASK;
 		n = SWAP_META_PAGES - sidx;
 		index += n;
 		if ((swap = *pswap) == NULL) {
 			c += n;
 			continue;
 		}
 		for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) {
 			if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE)
 				continue;
 			swp_pager_freeswapspace(v, 1);
 			swap->swb_pages[sidx] = SWAPBLK_NONE;
 			if (--swap->swb_count == 0) {
 				*pswap = swap->swb_hnext;
 				uma_zfree(swap_zone, swap);
 				--object->un_pager.swp.swp_bcount;
 				c += SWAP_META_PAGES - sidx;
 				break;
 			}
 		}
 	}
 	mtx_unlock(&swhash_mtx);
 }
 
 /*
  * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object
  *
  *	This routine locates and destroys all swap metadata associated with
  *	an object.
  */
 static void
 swp_pager_meta_free_all(vm_object_t object)
 {
 	struct swblock **pswap, *swap;
 	vm_pindex_t index;
 	daddr_t v;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_SWAP)
 		return;
 
 	index = 0;
 	while (object->un_pager.swp.swp_bcount != 0) {
 		mtx_lock(&swhash_mtx);
 		pswap = swp_pager_hash(object, index);
 		if ((swap = *pswap) != NULL) {
 			for (i = 0; i < SWAP_META_PAGES; ++i) {
 				v = swap->swb_pages[i];
 				if (v != SWAPBLK_NONE) {
 					--swap->swb_count;
 					swp_pager_freeswapspace(v, 1);
 				}
 			}
 			if (swap->swb_count != 0)
 				panic(
 				    "swap_pager_meta_free_all: swb_count != 0");
 			*pswap = swap->swb_hnext;
 			uma_zfree(swap_zone, swap);
 			--object->un_pager.swp.swp_bcount;
 		}
 		mtx_unlock(&swhash_mtx);
 		index += SWAP_META_PAGES;
 	}
 }
 
 /*
  * SWP_PAGER_METACTL() -  misc control of swap and vm_page_t meta data.
  *
  *	This routine is capable of looking up, popping, or freeing
  *	swapblk assignments in the swap meta data or in the vm_page_t.
  *	The routine typically returns the swapblk being looked-up, or popped,
  *	or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block
  *	was invalid.  This routine will automatically free any invalid
  *	meta-data swapblks.
  *
  *	It is not possible to store invalid swapblks in the swap meta data
  *	(other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
  *
  *	When acting on a busy resident page and paging is in progress, we
  *	have to wait until paging is complete but otherwise can act on the
  *	busy page.
  *
  *	SWM_FREE	remove and free swap block from metadata
  *	SWM_POP		remove from meta data but do not free.. pop it out
  */
 static daddr_t
 swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags)
 {
 	struct swblock **pswap;
 	struct swblock *swap;
 	daddr_t r1;
 	int idx;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	/*
 	 * The meta data only exists of the object is OBJT_SWAP
 	 * and even then might not be allocated yet.
 	 */
 	if (object->type != OBJT_SWAP)
 		return (SWAPBLK_NONE);
 
 	r1 = SWAPBLK_NONE;
 	mtx_lock(&swhash_mtx);
 	pswap = swp_pager_hash(object, pindex);
 
 	if ((swap = *pswap) != NULL) {
 		idx = pindex & SWAP_META_MASK;
 		r1 = swap->swb_pages[idx];
 
 		if (r1 != SWAPBLK_NONE) {
 			if (flags & SWM_FREE) {
 				swp_pager_freeswapspace(r1, 1);
 				r1 = SWAPBLK_NONE;
 			}
 			if (flags & (SWM_FREE|SWM_POP)) {
 				swap->swb_pages[idx] = SWAPBLK_NONE;
 				if (--swap->swb_count == 0) {
 					*pswap = swap->swb_hnext;
 					uma_zfree(swap_zone, swap);
 					--object->un_pager.swp.swp_bcount;
 				}
 			}
 		}
 	}
 	mtx_unlock(&swhash_mtx);
 	return (r1);
 }
 
 /*
  * Returns the least page index which is greater than or equal to the
  * parameter pindex and for which there is a swap block allocated.
  * Returns object's size if the object's type is not swap or if there
  * are no allocated swap blocks for the object after the requested
  * pindex.
  */
 vm_pindex_t
 swap_pager_find_least(vm_object_t object, vm_pindex_t pindex)
 {
 	struct swblock **pswap, *swap;
 	vm_pindex_t i, j, lim;
 	int idx;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0)
 		return (object->size);
 
 	mtx_lock(&swhash_mtx);
 	for (j = pindex; j < object->size; j = lim) {
 		pswap = swp_pager_hash(object, j);
 		lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES);
 		if (lim > object->size)
 			lim = object->size;
 		if ((swap = *pswap) != NULL) {
 			for (idx = j & SWAP_META_MASK, i = j; i < lim;
 			    i++, idx++) {
 				if (swap->swb_pages[idx] != SWAPBLK_NONE)
 					goto found;
 			}
 		}
 	}
 	i = object->size;
 found:
 	mtx_unlock(&swhash_mtx);
 	return (i);
 }
 
 /*
  * System call swapon(name) enables swapping on device name,
  * which must be in the swdevsw.  Return EBUSY
  * if already swapping on this device.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct swapon_args {
 	char *name;
 };
 #endif
 
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sys_swapon(struct thread *td, struct swapon_args *uap)
 {
 	struct vattr attr;
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	error = priv_check(td, PRIV_SWAPON);
 	if (error)
 		return (error);
 
 	sx_xlock(&swdev_syscall_lock);
 
 	/*
 	 * Swap metadata may not fit in the KVM if we have physical
 	 * memory of >1GB.
 	 */
 	if (swap_zone == NULL) {
 		error = ENOMEM;
 		goto done;
 	}
 
 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
 	    uap->name, td);
 	error = namei(&nd);
 	if (error)
 		goto done;
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	if (vn_isdisk(vp, &error)) {
 		error = swapongeom(vp);
 	} else if (vp->v_type == VREG &&
 	    (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 &&
 	    (error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) {
 		/*
 		 * Allow direct swapping to NFS regular files in the same
 		 * way that nfs_mountroot() sets up diskless swapping.
 		 */
 		error = swaponvp(td, vp, attr.va_size / DEV_BSIZE);
 	}
 
 	if (error)
 		vrele(vp);
 done:
 	sx_xunlock(&swdev_syscall_lock);
 	return (error);
 }
 
 /*
  * Check that the total amount of swap currently configured does not
  * exceed half the theoretical maximum.  If it does, print a warning
  * message and return -1; otherwise, return 0.
  */
 static int
 swapon_check_swzone(unsigned long npages)
 {
 	unsigned long maxpages;
 
 	/* absolute maximum we can handle assuming 100% efficiency */
 	maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES;
 
 	/* recommend using no more than half that amount */
 	if (npages > maxpages / 2) {
 		printf("warning: total configured swap (%lu pages) "
 		    "exceeds maximum recommended amount (%lu pages).\n",
 		    npages, maxpages / 2);
 		printf("warning: increase kern.maxswzone "
 		    "or reduce amount of swap.\n");
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 swaponsomething(struct vnode *vp, void *id, u_long nblks,
     sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
 {
 	struct swdevt *sp, *tsp;
 	swblk_t dvbase;
 	u_long mblocks;
 
 	/*
 	 * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
 	 * First chop nblks off to page-align it, then convert.
 	 *
 	 * sw->sw_nblks is in page-sized chunks now too.
 	 */
 	nblks &= ~(ctodb(1) - 1);
 	nblks = dbtoc(nblks);
 
 	/*
 	 * If we go beyond this, we get overflows in the radix
 	 * tree bitmap code.
 	 */
 	mblocks = 0x40000000 / BLIST_META_RADIX;
 	if (nblks > mblocks) {
 		printf(
     "WARNING: reducing swap size to maximum of %luMB per unit\n",
 		    mblocks / 1024 / 1024 * PAGE_SIZE);
 		nblks = mblocks;
 	}
 
 	sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO);
 	sp->sw_vp = vp;
 	sp->sw_id = id;
 	sp->sw_dev = dev;
 	sp->sw_flags = 0;
 	sp->sw_nblks = nblks;
 	sp->sw_used = 0;
 	sp->sw_strategy = strategy;
 	sp->sw_close = close;
 	sp->sw_flags = flags;
 
 	sp->sw_blist = blist_create(nblks, M_WAITOK);
 	/*
 	 * Do not free the first two block in order to avoid overwriting
 	 * any bsd label at the front of the partition
 	 */
 	blist_free(sp->sw_blist, 2, nblks - 2);
 
 	dvbase = 0;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(tsp, &swtailq, sw_list) {
 		if (tsp->sw_end >= dvbase) {
 			/*
 			 * We put one uncovered page between the devices
 			 * in order to definitively prevent any cross-device
 			 * I/O requests
 			 */
 			dvbase = tsp->sw_end + 1;
 		}
 	}
 	sp->sw_first = dvbase;
 	sp->sw_end = dvbase + nblks;
 	TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
 	nswapdev++;
 	swap_pager_avail += nblks;
 	swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
 	swapon_check_swzone(swap_total / PAGE_SIZE);
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
 	EVENTHANDLER_INVOKE(swapon, sp);
 }
 
 /*
  * SYSCALL: swapoff(devname)
  *
  * Disable swapping on the given device.
  *
  * XXX: Badly designed system call: it should use a device index
  * rather than filename as specification.  We keep sw_vp around
  * only to make this work.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct swapoff_args {
 	char *name;
 };
 #endif
 
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sys_swapoff(struct thread *td, struct swapoff_args *uap)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	struct swdevt *sp;
 	int error;
 
 	error = priv_check(td, PRIV_SWAPOFF);
 	if (error)
 		return (error);
 
 	sx_xlock(&swdev_syscall_lock);
 
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name,
 	    td);
 	error = namei(&nd);
 	if (error)
 		goto done;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_vp == vp)
 			break;
 	}
 	mtx_unlock(&sw_dev_mtx);
 	if (sp == NULL) {
 		error = EINVAL;
 		goto done;
 	}
 	error = swapoff_one(sp, td->td_ucred);
 done:
 	sx_xunlock(&swdev_syscall_lock);
 	return (error);
 }
 
 static int
 swapoff_one(struct swdevt *sp, struct ucred *cred)
 {
 	u_long nblks, dvbase;
 #ifdef MAC
 	int error;
 #endif
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 #ifdef MAC
 	(void) vn_lock(sp->sw_vp, LK_EXCLUSIVE | LK_RETRY);
 	error = mac_system_check_swapoff(cred, sp->sw_vp);
 	(void) VOP_UNLOCK(sp->sw_vp, 0);
 	if (error != 0)
 		return (error);
 #endif
 	nblks = sp->sw_nblks;
 
 	/*
 	 * We can turn off this swap device safely only if the
 	 * available virtual memory in the system will fit the amount
 	 * of data we will have to page back in, plus an epsilon so
 	 * the system doesn't become critically low on swap space.
 	 */
 	if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat)
 		return (ENOMEM);
 
 	/*
 	 * Prevent further allocations on this device.
 	 */
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_flags |= SW_CLOSING;
 	for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) {
 		swap_pager_avail -= blist_fill(sp->sw_blist,
 		     dvbase, dmmax);
 	}
 	swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
 	mtx_unlock(&sw_dev_mtx);
 
 	/*
 	 * Page in the contents of the device and close it.
 	 */
 	swap_pager_swapoff(sp);
 
 	sp->sw_close(curthread, sp);
 	mtx_lock(&sw_dev_mtx);
 	sp->sw_id = NULL;
 	TAILQ_REMOVE(&swtailq, sp, sw_list);
 	nswapdev--;
 	if (nswapdev == 0) {
 		swap_pager_full = 2;
 		swap_pager_almost_full = 1;
 	}
 	if (swdevhd == sp)
 		swdevhd = NULL;
 	mtx_unlock(&sw_dev_mtx);
 	blist_destroy(sp->sw_blist);
 	free(sp, M_VMPGDATA);
 	return (0);
 }
 
 void
 swapoff_all(void)
 {
 	struct swdevt *sp, *spt;
 	const char *devname;
 	int error;
 
 	sx_xlock(&swdev_syscall_lock);
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH_SAFE(sp, &swtailq, sw_list, spt) {
 		mtx_unlock(&sw_dev_mtx);
 		if (vn_isdisk(sp->sw_vp, NULL))
 			devname = devtoname(sp->sw_vp->v_rdev);
 		else
 			devname = "[file]";
 		error = swapoff_one(sp, thread0.td_ucred);
 		if (error != 0) {
 			printf("Cannot remove swap device %s (error=%d), "
 			    "skipping.\n", devname, error);
 		} else if (bootverbose) {
 			printf("Swap device %s removed.\n", devname);
 		}
 		mtx_lock(&sw_dev_mtx);
 	}
 	mtx_unlock(&sw_dev_mtx);
 
 	sx_xunlock(&swdev_syscall_lock);
 }
 
 void
 swap_pager_status(int *total, int *used)
 {
 	struct swdevt *sp;
 
 	*total = 0;
 	*used = 0;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		*total += sp->sw_nblks;
 		*used += sp->sw_used;
 	}
 	mtx_unlock(&sw_dev_mtx);
 }
 
 int
 swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len)
 {
 	struct swdevt *sp;
 	const char *tmp_devname;
 	int error, n;
 
 	n = 0;
 	error = ENOENT;
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (n != name) {
 			n++;
 			continue;
 		}
 		xs->xsw_version = XSWDEV_VERSION;
 		xs->xsw_dev = sp->sw_dev;
 		xs->xsw_flags = sp->sw_flags;
 		xs->xsw_nblks = sp->sw_nblks;
 		xs->xsw_used = sp->sw_used;
 		if (devname != NULL) {
 			if (vn_isdisk(sp->sw_vp, NULL))
 				tmp_devname = devtoname(sp->sw_vp->v_rdev);
 			else
 				tmp_devname = "[file]";
 			strncpy(devname, tmp_devname, len);
 		}
 		error = 0;
 		break;
 	}
 	mtx_unlock(&sw_dev_mtx);
 	return (error);
 }
 
+#if defined(COMPAT_FREEBSD11)
+#define XSWDEV_VERSION_11	1
+struct xswdev11 {
+	u_int	xsw_version;
+	uint32_t xsw_dev;
+	int	xsw_flags;
+	int	xsw_nblks;
+	int     xsw_used;
+};
+#endif
+
 static int
 sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
 {
 	struct xswdev xs;
+#if defined(COMPAT_FREEBSD11)
+	struct xswdev11 xs11;
+#endif
 	int error;
 
 	if (arg2 != 1)			/* name length */
 		return (EINVAL);
 	error = swap_dev_info(*(int *)arg1, &xs, NULL, 0);
 	if (error != 0)
 		return (error);
-	error = SYSCTL_OUT(req, &xs, sizeof(xs));
+#if defined(COMPAT_FREEBSD11)
+	if (req->oldlen == sizeof(xs11)) {
+		xs11.xsw_version = XSWDEV_VERSION_11;
+		xs11.xsw_dev = xs.xsw_dev; /* truncation */
+		xs11.xsw_flags = xs.xsw_flags;
+		xs11.xsw_nblks = xs.xsw_nblks;
+		xs11.xsw_used = xs.xsw_used;
+		error = SYSCTL_OUT(req, &xs11, sizeof(xs11));
+	} else
+#endif
+		error = SYSCTL_OUT(req, &xs, sizeof(xs));
 	return (error);
 }
 
 SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswapdev, 0,
     "Number of swap devices");
 SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_vm_swap_info,
     "Swap statistics by device");
 
 /*
  * vmspace_swap_count() - count the approximate swap usage in pages for a
  *			  vmspace.
  *
  *	The map must be locked.
  *
  *	Swap usage is determined by taking the proportional swap used by
  *	VM objects backing the VM map.  To make up for fractional losses,
  *	if the VM object has any swap use at all the associated map entries
  *	count for at least 1 swap page.
  */
 long
 vmspace_swap_count(struct vmspace *vmspace)
 {
 	vm_map_t map;
 	vm_map_entry_t cur;
 	vm_object_t object;
 	long count, n;
 
 	map = &vmspace->vm_map;
 	count = 0;
 
 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
 		    (object = cur->object.vm_object) != NULL) {
 			VM_OBJECT_WLOCK(object);
 			if (object->type == OBJT_SWAP &&
 			    object->un_pager.swp.swp_bcount != 0) {
 				n = (cur->end - cur->start) / PAGE_SIZE;
 				count += object->un_pager.swp.swp_bcount *
 				    SWAP_META_PAGES * n / object->size + 1;
 			}
 			VM_OBJECT_WUNLOCK(object);
 		}
 	}
 	return (count);
 }
 
 /*
  * GEOM backend
  *
  * Swapping onto disk devices.
  *
  */
 
 static g_orphan_t swapgeom_orphan;
 
 static struct g_class g_swap_class = {
 	.name = "SWAP",
 	.version = G_VERSION,
 	.orphan = swapgeom_orphan,
 };
 
 DECLARE_GEOM_CLASS(g_swap_class, g_class);
 
 
 static void
 swapgeom_close_ev(void *arg, int flags)
 {
 	struct g_consumer *cp;
 
 	cp = arg;
 	g_access(cp, -1, -1, 0);
 	g_detach(cp);
 	g_destroy_consumer(cp);
 }
 
 /*
  * Add a reference to the g_consumer for an inflight transaction.
  */
 static void
 swapgeom_acquire(struct g_consumer *cp)
 {
 
 	mtx_assert(&sw_dev_mtx, MA_OWNED);
 	cp->index++;
 }
 
 /*
  * Remove a reference from the g_consumer.  Post a close event if all
  * references go away, since the function might be called from the
  * biodone context.
  */
 static void
 swapgeom_release(struct g_consumer *cp, struct swdevt *sp)
 {
 
 	mtx_assert(&sw_dev_mtx, MA_OWNED);
 	cp->index--;
 	if (cp->index == 0) {
 		if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0)
 			sp->sw_id = NULL;
 	}
 }
 
 static void
 swapgeom_done(struct bio *bp2)
 {
 	struct swdevt *sp;
 	struct buf *bp;
 	struct g_consumer *cp;
 
 	bp = bp2->bio_caller2;
 	cp = bp2->bio_from;
 	bp->b_ioflags = bp2->bio_flags;
 	if (bp2->bio_error)
 		bp->b_ioflags |= BIO_ERROR;
 	bp->b_resid = bp->b_bcount - bp2->bio_completed;
 	bp->b_error = bp2->bio_error;
 	bufdone(bp);
 	sp = bp2->bio_caller1;
 	mtx_lock(&sw_dev_mtx);
 	swapgeom_release(cp, sp);
 	mtx_unlock(&sw_dev_mtx);
 	g_destroy_bio(bp2);
 }
 
 static void
 swapgeom_strategy(struct buf *bp, struct swdevt *sp)
 {
 	struct bio *bio;
 	struct g_consumer *cp;
 
 	mtx_lock(&sw_dev_mtx);
 	cp = sp->sw_id;
 	if (cp == NULL) {
 		mtx_unlock(&sw_dev_mtx);
 		bp->b_error = ENXIO;
 		bp->b_ioflags |= BIO_ERROR;
 		bufdone(bp);
 		return;
 	}
 	swapgeom_acquire(cp);
 	mtx_unlock(&sw_dev_mtx);
 	if (bp->b_iocmd == BIO_WRITE)
 		bio = g_new_bio();
 	else
 		bio = g_alloc_bio();
 	if (bio == NULL) {
 		mtx_lock(&sw_dev_mtx);
 		swapgeom_release(cp, sp);
 		mtx_unlock(&sw_dev_mtx);
 		bp->b_error = ENOMEM;
 		bp->b_ioflags |= BIO_ERROR;
 		bufdone(bp);
 		return;
 	}
 
 	bio->bio_caller1 = sp;
 	bio->bio_caller2 = bp;
 	bio->bio_cmd = bp->b_iocmd;
 	bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE;
 	bio->bio_length = bp->b_bcount;
 	bio->bio_done = swapgeom_done;
 	if (!buf_mapped(bp)) {
 		bio->bio_ma = bp->b_pages;
 		bio->bio_data = unmapped_buf;
 		bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
 		bio->bio_ma_n = bp->b_npages;
 		bio->bio_flags |= BIO_UNMAPPED;
 	} else {
 		bio->bio_data = bp->b_data;
 		bio->bio_ma = NULL;
 	}
 	g_io_request(bio, cp);
 	return;
 }
 
 static void
 swapgeom_orphan(struct g_consumer *cp)
 {
 	struct swdevt *sp;
 	int destroy;
 
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_id == cp) {
 			sp->sw_flags |= SW_CLOSING;
 			break;
 		}
 	}
 	/*
 	 * Drop reference we were created with. Do directly since we're in a
 	 * special context where we don't have to queue the call to
 	 * swapgeom_close_ev().
 	 */
 	cp->index--;
 	destroy = ((sp != NULL) && (cp->index == 0));
 	if (destroy)
 		sp->sw_id = NULL;
 	mtx_unlock(&sw_dev_mtx);
 	if (destroy)
 		swapgeom_close_ev(cp, 0);
 }
 
 static void
 swapgeom_close(struct thread *td, struct swdevt *sw)
 {
 	struct g_consumer *cp;
 
 	mtx_lock(&sw_dev_mtx);
 	cp = sw->sw_id;
 	sw->sw_id = NULL;
 	mtx_unlock(&sw_dev_mtx);
 
 	/*
 	 * swapgeom_close() may be called from the biodone context,
 	 * where we cannot perform topology changes.  Delegate the
 	 * work to the events thread.
 	 */
 	if (cp != NULL)
 		g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL);
 }
 
 static int
 swapongeom_locked(struct cdev *dev, struct vnode *vp)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	static struct g_geom *gp;
 	struct swdevt *sp;
 	u_long nblks;
 	int error;
 
 	pp = g_dev_getprovider(dev);
 	if (pp == NULL)
 		return (ENODEV);
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		cp = sp->sw_id;
 		if (cp != NULL && cp->provider == pp) {
 			mtx_unlock(&sw_dev_mtx);
 			return (EBUSY);
 		}
 	}
 	mtx_unlock(&sw_dev_mtx);
 	if (gp == NULL)
 		gp = g_new_geomf(&g_swap_class, "swap");
 	cp = g_new_consumer(gp);
 	cp->index = 1;	/* Number of active I/Os, plus one for being active. */
 	cp->flags |=  G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	g_attach(cp, pp);
 	/*
 	 * XXX: Every time you think you can improve the margin for
 	 * footshooting, somebody depends on the ability to do so:
 	 * savecore(8) wants to write to our swapdev so we cannot
 	 * set an exclusive count :-(
 	 */
 	error = g_access(cp, 1, 1, 0);
 	if (error != 0) {
 		g_detach(cp);
 		g_destroy_consumer(cp);
 		return (error);
 	}
 	nblks = pp->mediasize / DEV_BSIZE;
 	swaponsomething(vp, cp, nblks, swapgeom_strategy,
 	    swapgeom_close, dev2udev(dev),
 	    (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
 	return (0);
 }
 
 static int
 swapongeom(struct vnode *vp)
 {
 	int error;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_type != VCHR || (vp->v_iflag & VI_DOOMED) != 0) {
 		error = ENOENT;
 	} else {
 		g_topology_lock();
 		error = swapongeom_locked(vp->v_rdev, vp);
 		g_topology_unlock();
 	}
 	VOP_UNLOCK(vp, 0);
 	return (error);
 }
 
 /*
  * VNODE backend
  *
  * This is used mainly for network filesystem (read: probably only tested
  * with NFS) swapfiles.
  *
  */
 
 static void
 swapdev_strategy(struct buf *bp, struct swdevt *sp)
 {
 	struct vnode *vp2;
 
 	bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first);
 
 	vp2 = sp->sw_id;
 	vhold(vp2);
 	if (bp->b_iocmd == BIO_WRITE) {
 		if (bp->b_bufobj)
 			bufobj_wdrop(bp->b_bufobj);
 		bufobj_wref(&vp2->v_bufobj);
 	}
 	if (bp->b_bufobj != &vp2->v_bufobj)
 		bp->b_bufobj = &vp2->v_bufobj;
 	bp->b_vp = vp2;
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
 	return;
 }
 
 static void
 swapdev_close(struct thread *td, struct swdevt *sp)
 {
 
 	VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td);
 	vrele(sp->sw_vp);
 }
 
 
 static int
 swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 {
 	struct swdevt *sp;
 	int error;
 
 	if (nblks == 0)
 		return (ENXIO);
 	mtx_lock(&sw_dev_mtx);
 	TAILQ_FOREACH(sp, &swtailq, sw_list) {
 		if (sp->sw_id == vp) {
 			mtx_unlock(&sw_dev_mtx);
 			return (EBUSY);
 		}
 	}
 	mtx_unlock(&sw_dev_mtx);
 
 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_system_check_swapon(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL);
 	(void) VOP_UNLOCK(vp, 0);
 	if (error)
 		return (error);
 
 	swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
 	    NODEV, 0);
 	return (0);
 }
 
 static int
 sysctl_swap_async_max(SYSCTL_HANDLER_ARGS)
 {
 	int error, new, n;
 
 	new = nsw_wcount_async_max;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (new > nswbuf / 2 || new < 1)
 		return (EINVAL);
 
 	mtx_lock(&pbuf_mtx);
 	while (nsw_wcount_async_max != new) {
 		/*
 		 * Adjust difference.  If the current async count is too low,
 		 * we will need to sqeeze our update slowly in.  Sleep with a
 		 * higher priority than getpbuf() to finish faster.
 		 */
 		n = new - nsw_wcount_async_max;
 		if (nsw_wcount_async + n >= 0) {
 			nsw_wcount_async += n;
 			nsw_wcount_async_max += n;
 			wakeup(&nsw_wcount_async);
 		} else {
 			nsw_wcount_async_max -= nsw_wcount_async;
 			nsw_wcount_async = 0;
 			msleep(&nsw_wcount_async, &pbuf_mtx, PSWP,
 			    "swpsysctl", 0);
 		}
 	}
 	mtx_unlock(&pbuf_mtx);
 
 	return (0);
 }
Index: head/sys/vm/vm_object.c
===================================================================
--- head/sys/vm/vm_object.c	(revision 318735)
+++ head/sys/vm/vm_object.c	(revision 318736)
@@ -1,2644 +1,2647 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Virtual memory object module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>		/* for curproc, pageproc */
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/sx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 static int old_msync;
 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
     "Use old (insecure) msync behavior");
 
 static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
 		    int pagerflags, int flags, boolean_t *clearobjflags,
 		    boolean_t *eio);
 static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
 		    boolean_t *clearobjflags);
 static void	vm_object_qcollapse(vm_object_t object);
 static void	vm_object_vndeallocate(vm_object_t object);
 
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
  *	page of memory exists within exactly one object.
  *
  *	An object is only deallocated when all "references"
  *	are given up.  Only one "reference" to a given
  *	region of an object should be writeable.
  *
  *	Associated with each object is a list of all resident
  *	memory pages belonging to that object; this list is
  *	maintained by the "vm_page" module, and locked by the object's
  *	lock.
  *
  *	Each object also records a "pager" routine which is
  *	used to retrieve (and store) pages to the proper backing
  *	storage.  In addition, objects may be backed by other
  *	objects from which they were virtual-copied.
  *
  *	The only items within the object structure which are
  *	modified after time of creation are:
  *		reference count		locked by object's lock
  *		pager routine		locked by object's lock
  *
  */
 
 struct object_q vm_object_list;
 struct mtx vm_object_list_mtx;	/* lock for object list and count */
 
 struct vm_object kernel_object_store;
 struct vm_object kmem_object_store;
 
 static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0,
     "VM object stats");
 
 static long object_collapses;
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
     &object_collapses, 0, "VM object collapses");
 
 static long object_bypasses;
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
     &object_bypasses, 0, "VM object bypasses");
 
 static uma_zone_t obj_zone;
 
 static int vm_object_zinit(void *mem, int size, int flags);
 
 #ifdef INVARIANTS
 static void vm_object_zdtor(void *mem, int size, void *arg);
 
 static void
 vm_object_zdtor(void *mem, int size, void *arg)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	KASSERT(object->ref_count == 0,
 	    ("object %p ref_count = %d", object, object->ref_count));
 	KASSERT(TAILQ_EMPTY(&object->memq),
 	    ("object %p has resident pages in its memq", object));
 	KASSERT(vm_radix_is_empty(&object->rtree),
 	    ("object %p has resident pages in its trie", object));
 #if VM_NRESERVLEVEL > 0
 	KASSERT(LIST_EMPTY(&object->rvq),
 	    ("object %p has reservations",
 	    object));
 #endif
 	KASSERT(object->paging_in_progress == 0,
 	    ("object %p paging_in_progress = %d",
 	    object, object->paging_in_progress));
 	KASSERT(object->resident_page_count == 0,
 	    ("object %p resident_page_count = %d",
 	    object, object->resident_page_count));
 	KASSERT(object->shadow_count == 0,
 	    ("object %p shadow_count = %d",
 	    object, object->shadow_count));
 	KASSERT(object->type == OBJT_DEAD,
 	    ("object %p has non-dead type %d",
 	    object, object->type));
 }
 #endif
 
 static int
 vm_object_zinit(void *mem, int size, int flags)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)mem;
 	rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW);
 
 	/* These are true for any object that has been freed */
 	object->type = OBJT_DEAD;
 	object->ref_count = 0;
 	object->rtree.rt_root = 0;
 	object->paging_in_progress = 0;
 	object->resident_page_count = 0;
 	object->shadow_count = 0;
 
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
 	mtx_unlock(&vm_object_list_mtx);
 	return (0);
 }
 
 static void
 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 {
 
 	TAILQ_INIT(&object->memq);
 	LIST_INIT(&object->shadow_head);
 
 	object->type = type;
 	switch (type) {
 	case OBJT_DEAD:
 		panic("_vm_object_allocate: can't create OBJT_DEAD");
 	case OBJT_DEFAULT:
 	case OBJT_SWAP:
 		object->flags = OBJ_ONEMAPPING;
 		break;
 	case OBJT_DEVICE:
 	case OBJT_SG:
 		object->flags = OBJ_FICTITIOUS | OBJ_UNMANAGED;
 		break;
 	case OBJT_MGTDEVICE:
 		object->flags = OBJ_FICTITIOUS;
 		break;
 	case OBJT_PHYS:
 		object->flags = OBJ_UNMANAGED;
 		break;
 	case OBJT_VNODE:
 		object->flags = 0;
 		break;
 	default:
 		panic("_vm_object_allocate: type %d is undefined", type);
 	}
 	object->size = size;
 	object->generation = 1;
 	object->ref_count = 1;
 	object->memattr = VM_MEMATTR_DEFAULT;
 	object->cred = NULL;
 	object->charge = 0;
 	object->handle = NULL;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
 #if VM_NRESERVLEVEL > 0
 	LIST_INIT(&object->rvq);
 #endif
 	umtx_shm_object_init(object);
 }
 
 /*
  *	vm_object_init:
  *
  *	Initialize the VM objects module.
  */
 void
 vm_object_init(void)
 {
 	TAILQ_INIT(&vm_object_list);
 	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
 	
 	rw_init(&kernel_object->lock, "kernel vm object");
 	_vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
 	    VM_MIN_KERNEL_ADDRESS), kernel_object);
 #if VM_NRESERVLEVEL > 0
 	kernel_object->flags |= OBJ_COLORED;
 	kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
 #endif
 
 	rw_init(&kmem_object->lock, "kmem vm object");
 	_vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
 	    VM_MIN_KERNEL_ADDRESS), kmem_object);
 #if VM_NRESERVLEVEL > 0
 	kmem_object->flags |= OBJ_COLORED;
 	kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
 #endif
 
 	/*
 	 * The lock portion of struct vm_object must be type stable due
 	 * to vm_pageout_fallback_object_lock locking a vm object
 	 * without holding any references to it.
 	 */
 	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
 #ifdef INVARIANTS
 	    vm_object_zdtor,
 #else
 	    NULL,
 #endif
 	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 
 	vm_radix_init();
 }
 
 void
 vm_object_clear_flag(vm_object_t object, u_short bits)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->flags &= ~bits;
 }
 
 /*
  *	Sets the default memory attribute for the specified object.  Pages
  *	that are allocated to this object are by default assigned this memory
  *	attribute.
  *
  *	Presently, this function must be called before any pages are allocated
  *	to the object.  In the future, this requirement may be relaxed for
  *	"default" and "swap" objects.
  */
 int
 vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	switch (object->type) {
 	case OBJT_DEFAULT:
 	case OBJT_DEVICE:
 	case OBJT_MGTDEVICE:
 	case OBJT_PHYS:
 	case OBJT_SG:
 	case OBJT_SWAP:
 	case OBJT_VNODE:
 		if (!TAILQ_EMPTY(&object->memq))
 			return (KERN_FAILURE);
 		break;
 	case OBJT_DEAD:
 		return (KERN_INVALID_ARGUMENT);
 	default:
 		panic("vm_object_set_memattr: object %p is of undefined type",
 		    object);
 	}
 	object->memattr = memattr;
 	return (KERN_SUCCESS);
 }
 
 void
 vm_object_pip_add(vm_object_t object, short i)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->paging_in_progress += i;
 }
 
 void
 vm_object_pip_subtract(vm_object_t object, short i)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->paging_in_progress -= i;
 }
 
 void
 vm_object_pip_wakeup(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->paging_in_progress--;
 	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 		vm_object_clear_flag(object, OBJ_PIPWNT);
 		wakeup(object);
 	}
 }
 
 void
 vm_object_pip_wakeupn(vm_object_t object, short i)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (i)
 		object->paging_in_progress -= i;
 	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
 		vm_object_clear_flag(object, OBJ_PIPWNT);
 		wakeup(object);
 	}
 }
 
 void
 vm_object_pip_wait(vm_object_t object, char *waitid)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	while (object->paging_in_progress) {
 		object->flags |= OBJ_PIPWNT;
 		VM_OBJECT_SLEEP(object, object, PVM, waitid, 0);
 	}
 }
 
 /*
  *	vm_object_allocate:
  *
  *	Returns a new object with the given size.
  */
 vm_object_t
 vm_object_allocate(objtype_t type, vm_pindex_t size)
 {
 	vm_object_t object;
 
 	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
 	_vm_object_allocate(type, size, object);
 	return (object);
 }
 
 
 /*
  *	vm_object_reference:
  *
  *	Gets another reference to the given object.  Note: OBJ_DEAD
  *	objects can be referenced during final cleaning.
  */
 void
 vm_object_reference(vm_object_t object)
 {
 	if (object == NULL)
 		return;
 	VM_OBJECT_WLOCK(object);
 	vm_object_reference_locked(object);
 	VM_OBJECT_WUNLOCK(object);
 }
 
 /*
  *	vm_object_reference_locked:
  *
  *	Gets another reference to the given object.
  *
  *	The object must be locked.
  */
 void
 vm_object_reference_locked(vm_object_t object)
 {
 	struct vnode *vp;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	object->ref_count++;
 	if (object->type == OBJT_VNODE) {
 		vp = object->handle;
 		vref(vp);
 	}
 }
 
 /*
  * Handle deallocating an object of type OBJT_VNODE.
  */
 static void
 vm_object_vndeallocate(vm_object_t object)
 {
 	struct vnode *vp = (struct vnode *) object->handle;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_VNODE,
 	    ("vm_object_vndeallocate: not a vnode object"));
 	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
 #ifdef INVARIANTS
 	if (object->ref_count == 0) {
 		vn_printf(vp, "vm_object_vndeallocate ");
 		panic("vm_object_vndeallocate: bad object reference count");
 	}
 #endif
 
 	if (!umtx_shm_vnobj_persistent && object->ref_count == 1)
 		umtx_shm_object_terminated(object);
 
 	/*
 	 * The test for text of vp vnode does not need a bypass to
 	 * reach right VV_TEXT there, since it is obtained from
 	 * object->handle.
 	 */
 	if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) {
 		object->ref_count--;
 		VM_OBJECT_WUNLOCK(object);
 		/* vrele may need the vnode lock. */
 		vrele(vp);
 	} else {
 		vhold(vp);
 		VM_OBJECT_WUNLOCK(object);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		vdrop(vp);
 		VM_OBJECT_WLOCK(object);
 		object->ref_count--;
 		if (object->type == OBJT_DEAD) {
 			VM_OBJECT_WUNLOCK(object);
 			VOP_UNLOCK(vp, 0);
 		} else {
 			if (object->ref_count == 0)
 				VOP_UNSET_TEXT(vp);
 			VM_OBJECT_WUNLOCK(object);
 			vput(vp);
 		}
 	}
 }
 
 /*
  *	vm_object_deallocate:
  *
  *	Release a reference to the specified object,
  *	gained either through a vm_object_allocate
  *	or a vm_object_reference call.  When all references
  *	are gone, storage associated with this object
  *	may be relinquished.
  *
  *	No object may be locked.
  */
 void
 vm_object_deallocate(vm_object_t object)
 {
 	vm_object_t temp;
 	struct vnode *vp;
 
 	while (object != NULL) {
 		VM_OBJECT_WLOCK(object);
 		if (object->type == OBJT_VNODE) {
 			vm_object_vndeallocate(object);
 			return;
 		}
 
 		KASSERT(object->ref_count != 0,
 			("vm_object_deallocate: object deallocated too many times: %d", object->type));
 
 		/*
 		 * If the reference count goes to 0 we start calling
 		 * vm_object_terminate() on the object chain.
 		 * A ref count of 1 may be a special case depending on the
 		 * shadow count being 0 or 1.
 		 */
 		object->ref_count--;
 		if (object->ref_count > 1) {
 			VM_OBJECT_WUNLOCK(object);
 			return;
 		} else if (object->ref_count == 1) {
 			if (object->type == OBJT_SWAP &&
 			    (object->flags & OBJ_TMPFS) != 0) {
 				vp = object->un_pager.swp.swp_tmpfs;
 				vhold(vp);
 				VM_OBJECT_WUNLOCK(object);
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 				VM_OBJECT_WLOCK(object);
 				if (object->type == OBJT_DEAD ||
 				    object->ref_count != 1) {
 					VM_OBJECT_WUNLOCK(object);
 					VOP_UNLOCK(vp, 0);
 					vdrop(vp);
 					return;
 				}
 				if ((object->flags & OBJ_TMPFS) != 0)
 					VOP_UNSET_TEXT(vp);
 				VOP_UNLOCK(vp, 0);
 				vdrop(vp);
 			}
 			if (object->shadow_count == 0 &&
 			    object->handle == NULL &&
 			    (object->type == OBJT_DEFAULT ||
 			    (object->type == OBJT_SWAP &&
 			    (object->flags & OBJ_TMPFS_NODE) == 0))) {
 				vm_object_set_flag(object, OBJ_ONEMAPPING);
 			} else if ((object->shadow_count == 1) &&
 			    (object->handle == NULL) &&
 			    (object->type == OBJT_DEFAULT ||
 			     object->type == OBJT_SWAP)) {
 				vm_object_t robject;
 
 				robject = LIST_FIRST(&object->shadow_head);
 				KASSERT(robject != NULL,
 				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
 					 object->ref_count,
 					 object->shadow_count));
 				KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0,
 				    ("shadowed tmpfs v_object %p", object));
 				if (!VM_OBJECT_TRYWLOCK(robject)) {
 					/*
 					 * Avoid a potential deadlock.
 					 */
 					object->ref_count++;
 					VM_OBJECT_WUNLOCK(object);
 					/*
 					 * More likely than not the thread
 					 * holding robject's lock has lower
 					 * priority than the current thread.
 					 * Let the lower priority thread run.
 					 */
 					pause("vmo_de", 1);
 					continue;
 				}
 				/*
 				 * Collapse object into its shadow unless its
 				 * shadow is dead.  In that case, object will
 				 * be deallocated by the thread that is
 				 * deallocating its shadow.
 				 */
 				if ((robject->flags & OBJ_DEAD) == 0 &&
 				    (robject->handle == NULL) &&
 				    (robject->type == OBJT_DEFAULT ||
 				     robject->type == OBJT_SWAP)) {
 
 					robject->ref_count++;
 retry:
 					if (robject->paging_in_progress) {
 						VM_OBJECT_WUNLOCK(object);
 						vm_object_pip_wait(robject,
 						    "objde1");
 						temp = robject->backing_object;
 						if (object == temp) {
 							VM_OBJECT_WLOCK(object);
 							goto retry;
 						}
 					} else if (object->paging_in_progress) {
 						VM_OBJECT_WUNLOCK(robject);
 						object->flags |= OBJ_PIPWNT;
 						VM_OBJECT_SLEEP(object, object,
 						    PDROP | PVM, "objde2", 0);
 						VM_OBJECT_WLOCK(robject);
 						temp = robject->backing_object;
 						if (object == temp) {
 							VM_OBJECT_WLOCK(object);
 							goto retry;
 						}
 					} else
 						VM_OBJECT_WUNLOCK(object);
 
 					if (robject->ref_count == 1) {
 						robject->ref_count--;
 						object = robject;
 						goto doterm;
 					}
 					object = robject;
 					vm_object_collapse(object);
 					VM_OBJECT_WUNLOCK(object);
 					continue;
 				}
 				VM_OBJECT_WUNLOCK(robject);
 			}
 			VM_OBJECT_WUNLOCK(object);
 			return;
 		}
 doterm:
 		umtx_shm_object_terminated(object);
 		temp = object->backing_object;
 		if (temp != NULL) {
 			KASSERT((object->flags & OBJ_TMPFS_NODE) == 0,
 			    ("shadowed tmpfs v_object 2 %p", object));
 			VM_OBJECT_WLOCK(temp);
 			LIST_REMOVE(object, shadow_list);
 			temp->shadow_count--;
 			VM_OBJECT_WUNLOCK(temp);
 			object->backing_object = NULL;
 		}
 		/*
 		 * Don't double-terminate, we could be in a termination
 		 * recursion due to the terminate having to sync data
 		 * to disk.
 		 */
 		if ((object->flags & OBJ_DEAD) == 0)
 			vm_object_terminate(object);
 		else
 			VM_OBJECT_WUNLOCK(object);
 		object = temp;
 	}
 }
 
 /*
  *	vm_object_destroy removes the object from the global object list
  *      and frees the space for the object.
  */
 void
 vm_object_destroy(vm_object_t object)
 {
 
 	/*
 	 * Release the allocation charge.
 	 */
 	if (object->cred != NULL) {
 		swap_release_by_cred(object->charge, object->cred);
 		object->charge = 0;
 		crfree(object->cred);
 		object->cred = NULL;
 	}
 
 	/*
 	 * Free the space for the object.
 	 */
 	uma_zfree(obj_zone, object);
 }
 
 /*
  *	vm_object_terminate actually destroys the specified object, freeing
  *	up all previously used resources.
  *
  *	The object must be locked.
  *	This routine may block.
  */
 void
 vm_object_terminate(vm_object_t object)
 {
 	vm_page_t p, p_next;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Make sure no one uses us.
 	 */
 	vm_object_set_flag(object, OBJ_DEAD);
 
 	/*
 	 * wait for the pageout daemon to be done with the object
 	 */
 	vm_object_pip_wait(object, "objtrm");
 
 	KASSERT(!object->paging_in_progress,
 		("vm_object_terminate: pageout in progress"));
 
 	/*
 	 * Clean and free the pages, as appropriate. All references to the
 	 * object are gone, so we don't need to lock it.
 	 */
 	if (object->type == OBJT_VNODE) {
 		struct vnode *vp = (struct vnode *)object->handle;
 
 		/*
 		 * Clean pages and flush buffers.
 		 */
 		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_WUNLOCK(object);
 
 		vinvalbuf(vp, V_SAVE, 0, 0);
 
 		BO_LOCK(&vp->v_bufobj);
 		vp->v_bufobj.bo_flag |= BO_DEAD;
 		BO_UNLOCK(&vp->v_bufobj);
 
 		VM_OBJECT_WLOCK(object);
 	}
 
 	KASSERT(object->ref_count == 0, 
 		("vm_object_terminate: object with references, ref_count=%d",
 		object->ref_count));
 
 	/*
 	 * Free any remaining pageable pages.  This also removes them from the
 	 * paging queues.  However, don't free wired pages, just remove them
 	 * from the object.  Rather than incrementally removing each page from
 	 * the object, the page and object are reset to any empty state. 
 	 */
 	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
 		vm_page_assert_unbusied(p);
 		vm_page_lock(p);
 		/*
 		 * Optimize the page's removal from the object by resetting
 		 * its "object" field.  Specifically, if the page is not
 		 * wired, then the effect of this assignment is that
 		 * vm_page_free()'s call to vm_page_remove() will return
 		 * immediately without modifying the page or the object.
 		 */ 
 		p->object = NULL;
 		if (p->wire_count == 0) {
 			vm_page_free(p);
 			VM_CNT_INC(v_pfree);
 		}
 		vm_page_unlock(p);
 	}
 	/*
 	 * If the object contained any pages, then reset it to an empty state.
 	 * None of the object's fields, including "resident_page_count", were
 	 * modified by the preceding loop.
 	 */
 	if (object->resident_page_count != 0) {
 		vm_radix_reclaim_allnodes(&object->rtree);
 		TAILQ_INIT(&object->memq);
 		object->resident_page_count = 0;
 		if (object->type == OBJT_VNODE)
 			vdrop(object->handle);
 	}
 
 #if VM_NRESERVLEVEL > 0
 	if (__predict_false(!LIST_EMPTY(&object->rvq)))
 		vm_reserv_break_all(object);
 #endif
 
 	KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
 	    object->type == OBJT_SWAP,
 	    ("%s: non-swap obj %p has cred", __func__, object));
 
 	/*
 	 * Let the pager know object is dead.
 	 */
 	vm_pager_deallocate(object);
 	VM_OBJECT_WUNLOCK(object);
 
 	vm_object_destroy(object);
 }
 
 /*
  * Make the page read-only so that we can clear the object flags.  However, if
  * this is a nosync mmap then the object is likely to stay dirty so do not
  * mess with the page and do not clear the object flags.  Returns TRUE if the
  * page should be flushed, and FALSE otherwise.
  */
 static boolean_t
 vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags)
 {
 
 	/*
 	 * If we have been asked to skip nosync pages and this is a
 	 * nosync page, skip it.  Note that the object flags were not
 	 * cleared in this case so we do not have to set them.
 	 */
 	if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) {
 		*clearobjflags = FALSE;
 		return (FALSE);
 	} else {
 		pmap_remove_write(p);
 		return (p->dirty != 0);
 	}
 }
 
 /*
  *	vm_object_page_clean
  *
  *	Clean all dirty pages in the specified range of object.  Leaves page 
  * 	on whatever queue it is currently on.   If NOSYNC is set then do not
  *	write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
  *	leaving the object dirty.
  *
  *	When stuffing pages asynchronously, allow clustering.  XXX we need a
  *	synchronous clustering mode implementation.
  *
  *	Odd semantics: if start == end, we clean everything.
  *
  *	The object must be locked.
  *
  *	Returns FALSE if some page from the range was not written, as
  *	reported by the pager, and TRUE otherwise.
  */
 boolean_t
 vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
     int flags)
 {
 	vm_page_t np, p;
 	vm_pindex_t pi, tend, tstart;
 	int curgeneration, n, pagerflags;
 	boolean_t clearobjflags, eio, res;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * The OBJ_MIGHTBEDIRTY flag is only set for OBJT_VNODE
 	 * objects.  The check below prevents the function from
 	 * operating on non-vnode objects.
 	 */
 	if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 ||
 	    object->resident_page_count == 0)
 		return (TRUE);
 
 	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
 	    VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
 	pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
 
 	tstart = OFF_TO_IDX(start);
 	tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
 	clearobjflags = tstart == 0 && tend >= object->size;
 	res = TRUE;
 
 rescan:
 	curgeneration = object->generation;
 
 	for (p = vm_page_find_least(object, tstart); p != NULL; p = np) {
 		pi = p->pindex;
 		if (pi >= tend)
 			break;
 		np = TAILQ_NEXT(p, listq);
 		if (p->valid == 0)
 			continue;
 		if (vm_page_sleep_if_busy(p, "vpcwai")) {
 			if (object->generation != curgeneration) {
 				if ((flags & OBJPC_SYNC) != 0)
 					goto rescan;
 				else
 					clearobjflags = FALSE;
 			}
 			np = vm_page_find_least(object, pi);
 			continue;
 		}
 		if (!vm_object_page_remove_write(p, flags, &clearobjflags))
 			continue;
 
 		n = vm_object_page_collect_flush(object, p, pagerflags,
 		    flags, &clearobjflags, &eio);
 		if (eio) {
 			res = FALSE;
 			clearobjflags = FALSE;
 		}
 		if (object->generation != curgeneration) {
 			if ((flags & OBJPC_SYNC) != 0)
 				goto rescan;
 			else
 				clearobjflags = FALSE;
 		}
 
 		/*
 		 * If the VOP_PUTPAGES() did a truncated write, so
 		 * that even the first page of the run is not fully
 		 * written, vm_pageout_flush() returns 0 as the run
 		 * length.  Since the condition that caused truncated
 		 * write may be permanent, e.g. exhausted free space,
 		 * accepting n == 0 would cause an infinite loop.
 		 *
 		 * Forwarding the iterator leaves the unwritten page
 		 * behind, but there is not much we can do there if
 		 * filesystem refuses to write it.
 		 */
 		if (n == 0) {
 			n = 1;
 			clearobjflags = FALSE;
 		}
 		np = vm_page_find_least(object, pi + n);
 	}
 #if 0
 	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
 #endif
 
 	if (clearobjflags)
 		vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
 	return (res);
 }
 
 static int
 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
     int flags, boolean_t *clearobjflags, boolean_t *eio)
 {
 	vm_page_t ma[vm_pageout_page_count], p_first, tp;
 	int count, i, mreq, runlen;
 
 	vm_page_lock_assert(p, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	count = 1;
 	mreq = 0;
 
 	for (tp = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_next(tp);
 		if (tp == NULL || vm_page_busied(tp))
 			break;
 		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
 			break;
 	}
 
 	for (p_first = p; count < vm_pageout_page_count; count++) {
 		tp = vm_page_prev(p_first);
 		if (tp == NULL || vm_page_busied(tp))
 			break;
 		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
 			break;
 		p_first = tp;
 		mreq++;
 	}
 
 	for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
 		ma[i] = tp;
 
 	vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio);
 	return (runlen);
 }
 
 /*
  * Note that there is absolutely no sense in writing out
  * anonymous objects, so we track down the vnode object
  * to write out.
  * We invalidate (remove) all pages from the address space
  * for semantic correctness.
  *
  * If the backing object is a device object with unmanaged pages, then any
  * mappings to the specified range of pages must be removed before this
  * function is called.
  *
  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
  * may start out with a NULL object.
  */
 boolean_t
 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
     boolean_t syncio, boolean_t invalidate)
 {
 	vm_object_t backing_object;
 	struct vnode *vp;
 	struct mount *mp;
 	int error, flags, fsync_after;
 	boolean_t res;
 
 	if (object == NULL)
 		return (TRUE);
 	res = TRUE;
 	error = 0;
 	VM_OBJECT_WLOCK(object);
 	while ((backing_object = object->backing_object) != NULL) {
 		VM_OBJECT_WLOCK(backing_object);
 		offset += object->backing_object_offset;
 		VM_OBJECT_WUNLOCK(object);
 		object = backing_object;
 		if (object->size < OFF_TO_IDX(offset + size))
 			size = IDX_TO_OFF(object->size) - offset;
 	}
 	/*
 	 * Flush pages if writing is allowed, invalidate them
 	 * if invalidation requested.  Pages undergoing I/O
 	 * will be ignored by vm_object_page_remove().
 	 *
 	 * We cannot lock the vnode and then wait for paging
 	 * to complete without deadlocking against vm_fault.
 	 * Instead we simply call vm_object_page_remove() and
 	 * allow it to block internally on a page-by-page
 	 * basis when it encounters pages undergoing async
 	 * I/O.
 	 */
 	if (object->type == OBJT_VNODE &&
 	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 		vp = object->handle;
 		VM_OBJECT_WUNLOCK(object);
 		(void) vn_start_write(vp, &mp, V_WAIT);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (syncio && !invalidate && offset == 0 &&
 		    atop(size) == object->size) {
 			/*
 			 * If syncing the whole mapping of the file,
 			 * it is faster to schedule all the writes in
 			 * async mode, also allowing the clustering,
 			 * and then wait for i/o to complete.
 			 */
 			flags = 0;
 			fsync_after = TRUE;
 		} else {
 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 			flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0;
 			fsync_after = FALSE;
 		}
 		VM_OBJECT_WLOCK(object);
 		res = vm_object_page_clean(object, offset, offset + size,
 		    flags);
 		VM_OBJECT_WUNLOCK(object);
 		if (fsync_after)
 			error = VOP_FSYNC(vp, MNT_WAIT, curthread);
 		VOP_UNLOCK(vp, 0);
 		vn_finished_write(mp);
 		if (error != 0)
 			res = FALSE;
 		VM_OBJECT_WLOCK(object);
 	}
 	if ((object->type == OBJT_VNODE ||
 	     object->type == OBJT_DEVICE) && invalidate) {
 		if (object->type == OBJT_DEVICE)
 			/*
 			 * The option OBJPR_NOTMAPPED must be passed here
 			 * because vm_object_page_remove() cannot remove
 			 * unmanaged mappings.
 			 */
 			flags = OBJPR_NOTMAPPED;
 		else if (old_msync)
 			flags = 0;
 		else
 			flags = OBJPR_CLEANONLY;
 		vm_object_page_remove(object, OFF_TO_IDX(offset),
 		    OFF_TO_IDX(offset + size + PAGE_MASK), flags);
 	}
 	VM_OBJECT_WUNLOCK(object);
 	return (res);
 }
 
 /*
  * Determine whether the given advice can be applied to the object.  Advice is
  * not applied to unmanaged pages since they never belong to page queues, and
  * since MADV_FREE is destructive, it can apply only to anonymous pages that
  * have been mapped at most once.
  */
 static bool
 vm_object_advice_applies(vm_object_t object, int advice)
 {
 
 	if ((object->flags & OBJ_UNMANAGED) != 0)
 		return (false);
 	if (advice != MADV_FREE)
 		return (true);
 	return ((object->type == OBJT_DEFAULT || object->type == OBJT_SWAP) &&
 	    (object->flags & OBJ_ONEMAPPING) != 0);
 }
 
 static void
 vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex,
     vm_size_t size)
 {
 
 	if (advice == MADV_FREE && object->type == OBJT_SWAP)
 		swap_pager_freespace(object, pindex, size);
 }
 
 /*
  *	vm_object_madvise:
  *
  *	Implements the madvise function at the object/page level.
  *
  *	MADV_WILLNEED	(any object)
  *
  *	    Activate the specified pages if they are resident.
  *
  *	MADV_DONTNEED	(any object)
  *
  *	    Deactivate the specified pages if they are resident.
  *
  *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
  *			 OBJ_ONEMAPPING only)
  *
  *	    Deactivate and clean the specified pages if they are
  *	    resident.  This permits the process to reuse the pages
  *	    without faulting or the kernel to reclaim the pages
  *	    without I/O.
  */
 void
 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
     int advice)
 {
 	vm_pindex_t tpindex;
 	vm_object_t backing_object, tobject;
 	vm_page_t m, tm;
 
 	if (object == NULL)
 		return;
 
 relookup:
 	VM_OBJECT_WLOCK(object);
 	if (!vm_object_advice_applies(object, advice)) {
 		VM_OBJECT_WUNLOCK(object);
 		return;
 	}
 	for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) {
 		tobject = object;
 
 		/*
 		 * If the next page isn't resident in the top-level object, we
 		 * need to search the shadow chain.  When applying MADV_FREE, we
 		 * take care to release any swap space used to store
 		 * non-resident pages.
 		 */
 		if (m == NULL || pindex < m->pindex) {
 			/*
 			 * Optimize a common case: if the top-level object has
 			 * no backing object, we can skip over the non-resident
 			 * range in constant time.
 			 */
 			if (object->backing_object == NULL) {
 				tpindex = (m != NULL && m->pindex < end) ?
 				    m->pindex : end;
 				vm_object_madvise_freespace(object, advice,
 				    pindex, tpindex - pindex);
 				if ((pindex = tpindex) == end)
 					break;
 				goto next_page;
 			}
 
 			tpindex = pindex;
 			do {
 				vm_object_madvise_freespace(tobject, advice,
 				    tpindex, 1);
 				/*
 				 * Prepare to search the next object in the
 				 * chain.
 				 */
 				backing_object = tobject->backing_object;
 				if (backing_object == NULL)
 					goto next_pindex;
 				VM_OBJECT_WLOCK(backing_object);
 				tpindex +=
 				    OFF_TO_IDX(tobject->backing_object_offset);
 				if (tobject != object)
 					VM_OBJECT_WUNLOCK(tobject);
 				tobject = backing_object;
 				if (!vm_object_advice_applies(tobject, advice))
 					goto next_pindex;
 			} while ((tm = vm_page_lookup(tobject, tpindex)) ==
 			    NULL);
 		} else {
 next_page:
 			tm = m;
 			m = TAILQ_NEXT(m, listq);
 		}
 
 		/*
 		 * If the page is not in a normal state, skip it.
 		 */
 		if (tm->valid != VM_PAGE_BITS_ALL)
 			goto next_pindex;
 		vm_page_lock(tm);
 		if (tm->hold_count != 0 || tm->wire_count != 0) {
 			vm_page_unlock(tm);
 			goto next_pindex;
 		}
 		KASSERT((tm->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_madvise: page %p is fictitious", tm));
 		KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
 		    ("vm_object_madvise: page %p is not managed", tm));
 		if (vm_page_busied(tm)) {
 			if (object != tobject)
 				VM_OBJECT_WUNLOCK(tobject);
 			VM_OBJECT_WUNLOCK(object);
 			if (advice == MADV_WILLNEED) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_aflag_set(tm, PGA_REFERENCED);
 			}
 			vm_page_busy_sleep(tm, "madvpo", false);
   			goto relookup;
 		}
 		vm_page_advise(tm, advice);
 		vm_page_unlock(tm);
 		vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
 next_pindex:
 		if (tobject != object)
 			VM_OBJECT_WUNLOCK(tobject);
 	}
 	VM_OBJECT_WUNLOCK(object);
 }
 
 /*
  *	vm_object_shadow:
  *
  *	Create a new object which is backed by the
  *	specified existing object range.  The source
  *	object reference is deallocated.
  *
  *	The new object and offset into that object
  *	are returned in the source parameters.
  */
 void
 vm_object_shadow(
 	vm_object_t *object,	/* IN/OUT */
 	vm_ooffset_t *offset,	/* IN/OUT */
 	vm_size_t length)
 {
 	vm_object_t source;
 	vm_object_t result;
 
 	source = *object;
 
 	/*
 	 * Don't create the new object if the old object isn't shared.
 	 */
 	if (source != NULL) {
 		VM_OBJECT_WLOCK(source);
 		if (source->ref_count == 1 &&
 		    source->handle == NULL &&
 		    (source->type == OBJT_DEFAULT ||
 		     source->type == OBJT_SWAP)) {
 			VM_OBJECT_WUNLOCK(source);
 			return;
 		}
 		VM_OBJECT_WUNLOCK(source);
 	}
 
 	/*
 	 * Allocate a new object with the given length.
 	 */
 	result = vm_object_allocate(OBJT_DEFAULT, atop(length));
 
 	/*
 	 * The new object shadows the source object, adding a reference to it.
 	 * Our caller changes his reference to point to the new object,
 	 * removing a reference to the source object.  Net result: no change
 	 * of reference count.
 	 *
 	 * Try to optimize the result object's page color when shadowing
 	 * in order to maintain page coloring consistency in the combined 
 	 * shadowed object.
 	 */
 	result->backing_object = source;
 	/*
 	 * Store the offset into the source object, and fix up the offset into
 	 * the new object.
 	 */
 	result->backing_object_offset = *offset;
 	if (source != NULL) {
 		VM_OBJECT_WLOCK(source);
 		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 		source->shadow_count++;
 #if VM_NRESERVLEVEL > 0
 		result->flags |= source->flags & OBJ_COLORED;
 		result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
 		    ((1 << (VM_NFREEORDER - 1)) - 1);
 #endif
 		VM_OBJECT_WUNLOCK(source);
 	}
 
 
 	/*
 	 * Return the new things
 	 */
 	*offset = 0;
 	*object = result;
 }
 
 /*
  *	vm_object_split:
  *
  * Split the pages in a map entry into a new object.  This affords
  * easier removal of unused pages, and keeps object inheritance from
  * being a negative impact on memory usage.
  */
 void
 vm_object_split(vm_map_entry_t entry)
 {
 	vm_page_t m, m_next;
 	vm_object_t orig_object, new_object, source;
 	vm_pindex_t idx, offidxstart;
 	vm_size_t size;
 
 	orig_object = entry->object.vm_object;
 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 		return;
 	if (orig_object->ref_count <= 1)
 		return;
 	VM_OBJECT_WUNLOCK(orig_object);
 
 	offidxstart = OFF_TO_IDX(entry->offset);
 	size = atop(entry->end - entry->start);
 
 	/*
 	 * If swap_pager_copy() is later called, it will convert new_object
 	 * into a swap object.
 	 */
 	new_object = vm_object_allocate(OBJT_DEFAULT, size);
 
 	/*
 	 * At this point, the new object is still private, so the order in
 	 * which the original and new objects are locked does not matter.
 	 */
 	VM_OBJECT_WLOCK(new_object);
 	VM_OBJECT_WLOCK(orig_object);
 	source = orig_object->backing_object;
 	if (source != NULL) {
 		VM_OBJECT_WLOCK(source);
 		if ((source->flags & OBJ_DEAD) != 0) {
 			VM_OBJECT_WUNLOCK(source);
 			VM_OBJECT_WUNLOCK(orig_object);
 			VM_OBJECT_WUNLOCK(new_object);
 			vm_object_deallocate(new_object);
 			VM_OBJECT_WLOCK(orig_object);
 			return;
 		}
 		LIST_INSERT_HEAD(&source->shadow_head,
 				  new_object, shadow_list);
 		source->shadow_count++;
 		vm_object_reference_locked(source);	/* for new_object */
 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
 		VM_OBJECT_WUNLOCK(source);
 		new_object->backing_object_offset = 
 			orig_object->backing_object_offset + entry->offset;
 		new_object->backing_object = source;
 	}
 	if (orig_object->cred != NULL) {
 		new_object->cred = orig_object->cred;
 		crhold(orig_object->cred);
 		new_object->charge = ptoa(size);
 		KASSERT(orig_object->charge >= ptoa(size),
 		    ("orig_object->charge < 0"));
 		orig_object->charge -= ptoa(size);
 	}
 retry:
 	m = vm_page_find_least(orig_object, offidxstart);
 	for (; m != NULL && (idx = m->pindex - offidxstart) < size;
 	    m = m_next) {
 		m_next = TAILQ_NEXT(m, listq);
 
 		/*
 		 * We must wait for pending I/O to complete before we can
 		 * rename the page.
 		 *
 		 * We do not have to VM_PROT_NONE the page as mappings should
 		 * not be changed by this operation.
 		 */
 		if (vm_page_busied(m)) {
 			VM_OBJECT_WUNLOCK(new_object);
 			vm_page_lock(m);
 			VM_OBJECT_WUNLOCK(orig_object);
 			vm_page_busy_sleep(m, "spltwt", false);
 			VM_OBJECT_WLOCK(orig_object);
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
 
 		/* vm_page_rename() will dirty the page. */
 		if (vm_page_rename(m, new_object, idx)) {
 			VM_OBJECT_WUNLOCK(new_object);
 			VM_OBJECT_WUNLOCK(orig_object);
 			VM_WAIT;
 			VM_OBJECT_WLOCK(orig_object);
 			VM_OBJECT_WLOCK(new_object);
 			goto retry;
 		}
 #if VM_NRESERVLEVEL > 0
 		/*
 		 * If some of the reservation's allocated pages remain with
 		 * the original object, then transferring the reservation to
 		 * the new object is neither particularly beneficial nor
 		 * particularly harmful as compared to leaving the reservation
 		 * with the original object.  If, however, all of the
 		 * reservation's allocated pages are transferred to the new
 		 * object, then transferring the reservation is typically
 		 * beneficial.  Determining which of these two cases applies
 		 * would be more costly than unconditionally renaming the
 		 * reservation.
 		 */
 		vm_reserv_rename(m, new_object, orig_object, offidxstart);
 #endif
 		if (orig_object->type == OBJT_SWAP)
 			vm_page_xbusy(m);
 	}
 	if (orig_object->type == OBJT_SWAP) {
 		/*
 		 * swap_pager_copy() can sleep, in which case the orig_object's
 		 * and new_object's locks are released and reacquired. 
 		 */
 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
 		TAILQ_FOREACH(m, &new_object->memq, listq)
 			vm_page_xunbusy(m);
 	}
 	VM_OBJECT_WUNLOCK(orig_object);
 	VM_OBJECT_WUNLOCK(new_object);
 	entry->object.vm_object = new_object;
 	entry->offset = 0LL;
 	vm_object_deallocate(orig_object);
 	VM_OBJECT_WLOCK(new_object);
 }
 
 #define	OBSC_COLLAPSE_NOWAIT	0x0002
 #define	OBSC_COLLAPSE_WAIT	0x0004
 
 static vm_page_t
 vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next,
     int op)
 {
 	vm_object_t backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	backing_object = object->backing_object;
 	VM_OBJECT_ASSERT_WLOCKED(backing_object);
 
 	KASSERT(p == NULL || vm_page_busied(p), ("unbusy page %p", p));
 	KASSERT(p == NULL || p->object == object || p->object == backing_object,
 	    ("invalid ownership %p %p %p", p, object, backing_object));
 	if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
 		return (next);
 	if (p != NULL)
 		vm_page_lock(p);
 	VM_OBJECT_WUNLOCK(object);
 	VM_OBJECT_WUNLOCK(backing_object);
 	if (p == NULL)
 		VM_WAIT;
 	else
 		vm_page_busy_sleep(p, "vmocol", false);
 	VM_OBJECT_WLOCK(object);
 	VM_OBJECT_WLOCK(backing_object);
 	return (TAILQ_FIRST(&backing_object->memq));
 }
 
 static bool
 vm_object_scan_all_shadowed(vm_object_t object)
 {
 	vm_object_t backing_object;
 	vm_page_t p, pp;
 	vm_pindex_t backing_offset_index, new_pindex, pi, ps;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
 
 	backing_object = object->backing_object;
 
 	if (backing_object->type != OBJT_DEFAULT &&
 	    backing_object->type != OBJT_SWAP)
 		return (false);
 
 	pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 	p = vm_page_find_least(backing_object, pi);
 	ps = swap_pager_find_least(backing_object, pi);
 
 	/*
 	 * Only check pages inside the parent object's range and
 	 * inside the parent object's mapping of the backing object.
 	 */
 	for (;; pi++) {
 		if (p != NULL && p->pindex < pi)
 			p = TAILQ_NEXT(p, listq);
 		if (ps < pi)
 			ps = swap_pager_find_least(backing_object, pi);
 		if (p == NULL && ps >= backing_object->size)
 			break;
 		else if (p == NULL)
 			pi = ps;
 		else
 			pi = MIN(p->pindex, ps);
 
 		new_pindex = pi - backing_offset_index;
 		if (new_pindex >= object->size)
 			break;
 
 		/*
 		 * See if the parent has the page or if the parent's object
 		 * pager has the page.  If the parent has the page but the page
 		 * is not valid, the parent's object pager must have the page.
 		 *
 		 * If this fails, the parent does not completely shadow the
 		 * object and we might as well give up now.
 		 */
 		pp = vm_page_lookup(object, new_pindex);
 		if ((pp == NULL || pp->valid == 0) &&
 		    !vm_pager_has_page(object, new_pindex, NULL, NULL))
 			return (false);
 	}
 	return (true);
 }
 
 static bool
 vm_object_collapse_scan(vm_object_t object, int op)
 {
 	vm_object_t backing_object;
 	vm_page_t next, p, pp;
 	vm_pindex_t backing_offset_index, new_pindex;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
 
 	backing_object = object->backing_object;
 	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 
 	/*
 	 * Initial conditions
 	 */
 	if ((op & OBSC_COLLAPSE_WAIT) != 0)
 		vm_object_set_flag(backing_object, OBJ_DEAD);
 
 	/*
 	 * Our scan
 	 */
 	for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) {
 		next = TAILQ_NEXT(p, listq);
 		new_pindex = p->pindex - backing_offset_index;
 
 		/*
 		 * Check for busy page
 		 */
 		if (vm_page_busied(p)) {
 			next = vm_object_collapse_scan_wait(object, p, next, op);
 			continue;
 		}
 
 		KASSERT(p->object == backing_object,
 		    ("vm_object_collapse_scan: object mismatch"));
 
 		if (p->pindex < backing_offset_index ||
 		    new_pindex >= object->size) {
 			if (backing_object->type == OBJT_SWAP)
 				swap_pager_freespace(backing_object, p->pindex,
 				    1);
 
 			/*
 			 * Page is out of the parent object's range, we can
 			 * simply destroy it.
 			 */
 			vm_page_lock(p);
 			KASSERT(!pmap_page_is_mapped(p),
 			    ("freeing mapped page %p", p));
 			if (p->wire_count == 0)
 				vm_page_free(p);
 			else
 				vm_page_remove(p);
 			vm_page_unlock(p);
 			continue;
 		}
 
 		pp = vm_page_lookup(object, new_pindex);
 		if (pp != NULL && vm_page_busied(pp)) {
 			/*
 			 * The page in the parent is busy and possibly not
 			 * (yet) valid.  Until its state is finalized by the
 			 * busy bit owner, we can't tell whether it shadows the
 			 * original page.  Therefore, we must either skip it
 			 * and the original (backing_object) page or wait for
 			 * its state to be finalized.
 			 *
 			 * This is due to a race with vm_fault() where we must
 			 * unbusy the original (backing_obj) page before we can
 			 * (re)lock the parent.  Hence we can get here.
 			 */
 			next = vm_object_collapse_scan_wait(object, pp, next,
 			    op);
 			continue;
 		}
 
 		KASSERT(pp == NULL || pp->valid != 0,
 		    ("unbusy invalid page %p", pp));
 
 		if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
 			NULL)) {
 			/*
 			 * The page already exists in the parent OR swap exists
 			 * for this location in the parent.  Leave the parent's
 			 * page alone.  Destroy the original page from the
 			 * backing object.
 			 */
 			if (backing_object->type == OBJT_SWAP)
 				swap_pager_freespace(backing_object, p->pindex,
 				    1);
 			vm_page_lock(p);
 			KASSERT(!pmap_page_is_mapped(p),
 			    ("freeing mapped page %p", p));
 			if (p->wire_count == 0)
 				vm_page_free(p);
 			else
 				vm_page_remove(p);
 			vm_page_unlock(p);
 			continue;
 		}
 
 		/*
 		 * Page does not exist in parent, rename the page from the
 		 * backing object to the main object.
 		 *
 		 * If the page was mapped to a process, it can remain mapped
 		 * through the rename.  vm_page_rename() will dirty the page.
 		 */
 		if (vm_page_rename(p, object, new_pindex)) {
 			next = vm_object_collapse_scan_wait(object, NULL, next,
 			    op);
 			continue;
 		}
 
 		/* Use the old pindex to free the right page. */
 		if (backing_object->type == OBJT_SWAP)
 			swap_pager_freespace(backing_object,
 			    new_pindex + backing_offset_index, 1);
 
 #if VM_NRESERVLEVEL > 0
 		/*
 		 * Rename the reservation.
 		 */
 		vm_reserv_rename(p, object, backing_object,
 		    backing_offset_index);
 #endif
 	}
 	return (true);
 }
 
 
 /*
  * this version of collapse allows the operation to occur earlier and
  * when paging_in_progress is true for an object...  This is not a complete
  * operation, but should plug 99.9% of the rest of the leaks.
  */
 static void
 vm_object_qcollapse(vm_object_t object)
 {
 	vm_object_t backing_object = object->backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	VM_OBJECT_ASSERT_WLOCKED(backing_object);
 
 	if (backing_object->ref_count != 1)
 		return;
 
 	vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT);
 }
 
 /*
  *	vm_object_collapse:
  *
  *	Collapse an object with the object backing it.
  *	Pages in the backing object are moved into the
  *	parent, and the backing object is deallocated.
  */
 void
 vm_object_collapse(vm_object_t object)
 {
 	vm_object_t backing_object, new_backing_object;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	while (TRUE) {
 		/*
 		 * Verify that the conditions are right for collapse:
 		 *
 		 * The object exists and the backing object exists.
 		 */
 		if ((backing_object = object->backing_object) == NULL)
 			break;
 
 		/*
 		 * we check the backing object first, because it is most likely
 		 * not collapsable.
 		 */
 		VM_OBJECT_WLOCK(backing_object);
 		if (backing_object->handle != NULL ||
 		    (backing_object->type != OBJT_DEFAULT &&
 		     backing_object->type != OBJT_SWAP) ||
 		    (backing_object->flags & OBJ_DEAD) ||
 		    object->handle != NULL ||
 		    (object->type != OBJT_DEFAULT &&
 		     object->type != OBJT_SWAP) ||
 		    (object->flags & OBJ_DEAD)) {
 			VM_OBJECT_WUNLOCK(backing_object);
 			break;
 		}
 
 		if (object->paging_in_progress != 0 ||
 		    backing_object->paging_in_progress != 0) {
 			vm_object_qcollapse(object);
 			VM_OBJECT_WUNLOCK(backing_object);
 			break;
 		}
 
 		/*
 		 * We know that we can either collapse the backing object (if
 		 * the parent is the only reference to it) or (perhaps) have
 		 * the parent bypass the object if the parent happens to shadow
 		 * all the resident pages in the entire backing object.
 		 *
 		 * This is ignoring pager-backed pages such as swap pages.
 		 * vm_object_collapse_scan fails the shadowing test in this
 		 * case.
 		 */
 		if (backing_object->ref_count == 1) {
 			vm_object_pip_add(object, 1);
 			vm_object_pip_add(backing_object, 1);
 
 			/*
 			 * If there is exactly one reference to the backing
 			 * object, we can collapse it into the parent.
 			 */
 			vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT);
 
 #if VM_NRESERVLEVEL > 0
 			/*
 			 * Break any reservations from backing_object.
 			 */
 			if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
 				vm_reserv_break_all(backing_object);
 #endif
 
 			/*
 			 * Move the pager from backing_object to object.
 			 */
 			if (backing_object->type == OBJT_SWAP) {
 				/*
 				 * swap_pager_copy() can sleep, in which case
 				 * the backing_object's and object's locks are
 				 * released and reacquired.
 				 * Since swap_pager_copy() is being asked to
 				 * destroy the source, it will change the
 				 * backing_object's type to OBJT_DEFAULT.
 				 */
 				swap_pager_copy(
 				    backing_object,
 				    object,
 				    OFF_TO_IDX(object->backing_object_offset), TRUE);
 			}
 			/*
 			 * Object now shadows whatever backing_object did.
 			 * Note that the reference to 
 			 * backing_object->backing_object moves from within 
 			 * backing_object to within object.
 			 */
 			LIST_REMOVE(object, shadow_list);
 			backing_object->shadow_count--;
 			if (backing_object->backing_object) {
 				VM_OBJECT_WLOCK(backing_object->backing_object);
 				LIST_REMOVE(backing_object, shadow_list);
 				LIST_INSERT_HEAD(
 				    &backing_object->backing_object->shadow_head,
 				    object, shadow_list);
 				/*
 				 * The shadow_count has not changed.
 				 */
 				VM_OBJECT_WUNLOCK(backing_object->backing_object);
 			}
 			object->backing_object = backing_object->backing_object;
 			object->backing_object_offset +=
 			    backing_object->backing_object_offset;
 
 			/*
 			 * Discard backing_object.
 			 *
 			 * Since the backing object has no pages, no pager left,
 			 * and no object references within it, all that is
 			 * necessary is to dispose of it.
 			 */
 			KASSERT(backing_object->ref_count == 1, (
 "backing_object %p was somehow re-referenced during collapse!",
 			    backing_object));
 			vm_object_pip_wakeup(backing_object);
 			backing_object->type = OBJT_DEAD;
 			backing_object->ref_count = 0;
 			VM_OBJECT_WUNLOCK(backing_object);
 			vm_object_destroy(backing_object);
 
 			vm_object_pip_wakeup(object);
 			object_collapses++;
 		} else {
 			/*
 			 * If we do not entirely shadow the backing object,
 			 * there is nothing we can do so we give up.
 			 */
 			if (object->resident_page_count != object->size &&
 			    !vm_object_scan_all_shadowed(object)) {
 				VM_OBJECT_WUNLOCK(backing_object);
 				break;
 			}
 
 			/*
 			 * Make the parent shadow the next object in the
 			 * chain.  Deallocating backing_object will not remove
 			 * it, since its reference count is at least 2.
 			 */
 			LIST_REMOVE(object, shadow_list);
 			backing_object->shadow_count--;
 
 			new_backing_object = backing_object->backing_object;
 			if ((object->backing_object = new_backing_object) != NULL) {
 				VM_OBJECT_WLOCK(new_backing_object);
 				LIST_INSERT_HEAD(
 				    &new_backing_object->shadow_head,
 				    object,
 				    shadow_list
 				);
 				new_backing_object->shadow_count++;
 				vm_object_reference_locked(new_backing_object);
 				VM_OBJECT_WUNLOCK(new_backing_object);
 				object->backing_object_offset +=
 					backing_object->backing_object_offset;
 			}
 
 			/*
 			 * Drop the reference count on backing_object. Since
 			 * its ref_count was at least 2, it will not vanish.
 			 */
 			backing_object->ref_count--;
 			VM_OBJECT_WUNLOCK(backing_object);
 			object_bypasses++;
 		}
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
 	}
 }
 
 /*
  *	vm_object_page_remove:
  *
  *	For the given object, either frees or invalidates each of the
  *	specified pages.  In general, a page is freed.  However, if a page is
  *	wired for any reason other than the existence of a managed, wired
  *	mapping, then it may be invalidated but not removed from the object.
  *	Pages are specified by the given range ["start", "end") and the option
  *	OBJPR_CLEANONLY.  As a special case, if "end" is zero, then the range
  *	extends from "start" to the end of the object.  If the option
  *	OBJPR_CLEANONLY is specified, then only the non-dirty pages within the
  *	specified range are affected.  If the option OBJPR_NOTMAPPED is
  *	specified, then the pages within the specified range must have no
  *	mappings.  Otherwise, if this option is not specified, any mappings to
  *	the specified pages are removed before the pages are freed or
  *	invalidated.
  *
  *	In general, this operation should only be performed on objects that
  *	contain managed pages.  There are, however, two exceptions.  First, it
  *	is performed on the kernel and kmem objects by vm_map_entry_delete().
  *	Second, it is used by msync(..., MS_INVALIDATE) to invalidate device-
  *	backed pages.  In both of these cases, the option OBJPR_CLEANONLY must
  *	not be specified and the option OBJPR_NOTMAPPED must be specified.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     int options)
 {
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
 	    (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
 	    ("vm_object_page_remove: illegal options for object %p", object));
 	if (object->resident_page_count == 0)
 		return;
 	vm_object_pip_add(object, 1);
 again:
 	p = vm_page_find_least(object, start);
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
 	 * greater than or equal to the parameter "start" or (2) NULL. 
 	 */
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
 
 		/*
 		 * If the page is wired for any reason besides the existence
 		 * of managed, wired mappings, then it cannot be freed.  For
 		 * example, fictitious pages, which represent device memory,
 		 * are inherently wired and cannot be freed.  They can,
 		 * however, be invalidated if the option OBJPR_CLEANONLY is
 		 * not specified.
 		 */
 		vm_page_lock(p);
 		if (vm_page_xbusied(p)) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(p, "vmopax", true);
 			VM_OBJECT_WLOCK(object);
 			goto again;
 		}
 		if (p->wire_count != 0) {
 			if ((options & OBJPR_NOTMAPPED) == 0)
 				pmap_remove_all(p);
 			if ((options & OBJPR_CLEANONLY) == 0) {
 				p->valid = 0;
 				vm_page_undirty(p);
 			}
 			goto next;
 		}
 		if (vm_page_busied(p)) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(p, "vmopar", false);
 			VM_OBJECT_WLOCK(object);
 			goto again;
 		}
 		KASSERT((p->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_page_remove: page %p is fictitious", p));
 		if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
 			if ((options & OBJPR_NOTMAPPED) == 0)
 				pmap_remove_write(p);
 			if (p->dirty)
 				goto next;
 		}
 		if ((options & OBJPR_NOTMAPPED) == 0)
 			pmap_remove_all(p);
 		vm_page_free(p);
 next:
 		vm_page_unlock(p);
 	}
 	vm_object_pip_wakeup(object);
 }
 
 /*
  *	vm_object_page_noreuse:
  *
  *	For the given object, attempt to move the specified pages to
  *	the head of the inactive queue.  This bypasses regular LRU
  *	operation and allows the pages to be reused quickly under memory
  *	pressure.  If a page is wired for any reason, then it will not
  *	be queued.  Pages are specified by the range ["start", "end").
  *	As a special case, if "end" is zero, then the range extends from
  *	"start" to the end of the object.
  *
  *	This operation should only be performed on objects that
  *	contain non-fictitious, managed pages.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	struct mtx *mtx, *new_mtx;
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
 	    ("vm_object_page_noreuse: illegal object %p", object));
 	if (object->resident_page_count == 0)
 		return;
 	p = vm_page_find_least(object, start);
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
 	 * greater than or equal to the parameter "start" or (2) NULL. 
 	 */
 	mtx = NULL;
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
 
 		/*
 		 * Avoid releasing and reacquiring the same page lock.
 		 */
 		new_mtx = vm_page_lockptr(p);
 		if (mtx != new_mtx) {
 			if (mtx != NULL)
 				mtx_unlock(mtx);
 			mtx = new_mtx;
 			mtx_lock(mtx);
 		}
 		vm_page_deactivate_noreuse(p);
 	}
 	if (mtx != NULL)
 		mtx_unlock(mtx);
 }
 
 /*
  *	Populate the specified range of the object with valid pages.  Returns
  *	TRUE if the range is successfully populated and FALSE otherwise.
  *
  *	Note: This function should be optimized to pass a larger array of
  *	pages to vm_pager_get_pages() before it is applied to a non-
  *	OBJT_DEVICE object.
  *
  *	The object must be locked.
  */
 boolean_t
 vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 	int rv;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	for (pindex = start; pindex < end; pindex++) {
 		m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL);
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
 			if (rv != VM_PAGER_OK) {
 				vm_page_lock(m);
 				vm_page_free(m);
 				vm_page_unlock(m);
 				break;
 			}
 		}
 		/*
 		 * Keep "m" busy because a subsequent iteration may unlock
 		 * the object.
 		 */
 	}
 	if (pindex > start) {
 		m = vm_page_lookup(object, start);
 		while (m != NULL && m->pindex < pindex) {
 			vm_page_xunbusy(m);
 			m = TAILQ_NEXT(m, listq);
 		}
 	}
 	return (pindex == end);
 }
 
 /*
  *	Routine:	vm_object_coalesce
  *	Function:	Coalesces two objects backing up adjoining
  *			regions of memory into a single object.
  *
  *	returns TRUE if objects were combined.
  *
  *	NOTE:	Only works at the moment if the second object is NULL -
  *		if it's not, which object do we lock first?
  *
  *	Parameters:
  *		prev_object	First object to coalesce
  *		prev_offset	Offset into prev_object
  *		prev_size	Size of reference to prev_object
  *		next_size	Size of reference to the second object
  *		reserved	Indicator that extension region has
  *				swap accounted for
  *
  *	Conditions:
  *	The object must *not* be locked.
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
     vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
 {
 	vm_pindex_t next_pindex;
 
 	if (prev_object == NULL)
 		return (TRUE);
 	VM_OBJECT_WLOCK(prev_object);
 	if ((prev_object->type != OBJT_DEFAULT &&
 	    prev_object->type != OBJT_SWAP) ||
 	    (prev_object->flags & OBJ_TMPFS_NODE) != 0) {
 		VM_OBJECT_WUNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	/*
 	 * Try to collapse the object first
 	 */
 	vm_object_collapse(prev_object);
 
 	/*
 	 * Can't coalesce if: . more than one reference . paged out . shadows
 	 * another object . has a copy elsewhere (any of which mean that the
 	 * pages not mapped to prev_entry may be in use anyway)
 	 */
 	if (prev_object->backing_object != NULL) {
 		VM_OBJECT_WUNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	prev_size >>= PAGE_SHIFT;
 	next_size >>= PAGE_SHIFT;
 	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 
 	if ((prev_object->ref_count > 1) &&
 	    (prev_object->size != next_pindex)) {
 		VM_OBJECT_WUNLOCK(prev_object);
 		return (FALSE);
 	}
 
 	/*
 	 * Account for the charge.
 	 */
 	if (prev_object->cred != NULL) {
 
 		/*
 		 * If prev_object was charged, then this mapping,
 		 * although not charged now, may become writable
 		 * later. Non-NULL cred in the object would prevent
 		 * swap reservation during enabling of the write
 		 * access, so reserve swap now. Failed reservation
 		 * cause allocation of the separate object for the map
 		 * entry, and swap reservation for this entry is
 		 * managed in appropriate time.
 		 */
 		if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
 		    prev_object->cred)) {
 			VM_OBJECT_WUNLOCK(prev_object);
 			return (FALSE);
 		}
 		prev_object->charge += ptoa(next_size);
 	}
 
 	/*
 	 * Remove any pages that may still be in the object from a previous
 	 * deallocation.
 	 */
 	if (next_pindex < prev_object->size) {
 		vm_object_page_remove(prev_object, next_pindex, next_pindex +
 		    next_size, 0);
 		if (prev_object->type == OBJT_SWAP)
 			swap_pager_freespace(prev_object,
 					     next_pindex, next_size);
 #if 0
 		if (prev_object->cred != NULL) {
 			KASSERT(prev_object->charge >=
 			    ptoa(prev_object->size - next_pindex),
 			    ("object %p overcharged 1 %jx %jx", prev_object,
 				(uintmax_t)next_pindex, (uintmax_t)next_size));
 			prev_object->charge -= ptoa(prev_object->size -
 			    next_pindex);
 		}
 #endif
 	}
 
 	/*
 	 * Extend the object if necessary.
 	 */
 	if (next_pindex + next_size > prev_object->size)
 		prev_object->size = next_pindex + next_size;
 
 	VM_OBJECT_WUNLOCK(prev_object);
 	return (TRUE);
 }
 
 void
 vm_object_set_writeable_dirty(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_VNODE) {
 		if ((object->flags & OBJ_TMPFS_NODE) != 0) {
 			KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs"));
 			vm_object_set_flag(object, OBJ_TMPFS_DIRTY);
 		}
 		return;
 	}
 	object->generation++;
 	if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
 		return;
 	vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
 }
 
 /*
  *	vm_object_unwire:
  *
  *	For each page offset within the specified range of the given object,
  *	find the highest-level page in the shadow chain and unwire it.  A page
  *	must exist at every page offset, and the highest-level page must be
  *	wired.
  */
 void
 vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
     uint8_t queue)
 {
 	vm_object_t tobject;
 	vm_page_t m, tm;
 	vm_pindex_t end_pindex, pindex, tpindex;
 	int depth, locked_depth;
 
 	KASSERT((offset & PAGE_MASK) == 0,
 	    ("vm_object_unwire: offset is not page aligned"));
 	KASSERT((length & PAGE_MASK) == 0,
 	    ("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
 	/* The wired count of a fictitious page never changes. */
 	if ((object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	pindex = OFF_TO_IDX(offset);
 	end_pindex = pindex + atop(length);
 	locked_depth = 1;
 	VM_OBJECT_RLOCK(object);
 	m = vm_page_find_least(object, pindex);
 	while (pindex < end_pindex) {
 		if (m == NULL || pindex < m->pindex) {
 			/*
 			 * The first object in the shadow chain doesn't
 			 * contain a page at the current index.  Therefore,
 			 * the page must exist in a backing object.
 			 */
 			tobject = object;
 			tpindex = pindex;
 			depth = 0;
 			do {
 				tpindex +=
 				    OFF_TO_IDX(tobject->backing_object_offset);
 				tobject = tobject->backing_object;
 				KASSERT(tobject != NULL,
 				    ("vm_object_unwire: missing page"));
 				if ((tobject->flags & OBJ_FICTITIOUS) != 0)
 					goto next_page;
 				depth++;
 				if (depth == locked_depth) {
 					locked_depth++;
 					VM_OBJECT_RLOCK(tobject);
 				}
 			} while ((tm = vm_page_lookup(tobject, tpindex)) ==
 			    NULL);
 		} else {
 			tm = m;
 			m = TAILQ_NEXT(m, listq);
 		}
 		vm_page_lock(tm);
 		vm_page_unwire(tm, queue);
 		vm_page_unlock(tm);
 next_page:
 		pindex++;
 	}
 	/* Release the accumulated object locks. */
 	for (depth = 0; depth < locked_depth; depth++) {
 		tobject = object->backing_object;
 		VM_OBJECT_RUNLOCK(object);
 		object = tobject;
 	}
 }
 
 struct vnode *
 vm_object_vnode(vm_object_t object)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (object->type == OBJT_VNODE)
 		return (object->handle);
 	if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0)
 		return (object->un_pager.swp.swp_tmpfs);
 	return (NULL);
 }
 
 static int
 sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_vmobject kvo;
 	char *fullpath, *freepath;
 	struct vnode *vp;
 	struct vattr va;
 	vm_object_t obj;
 	vm_page_t m;
 	int count, error;
 
 	if (req->oldptr == NULL) {
 		/*
 		 * If an old buffer has not been provided, generate an
 		 * estimate of the space needed for a subsequent call.
 		 */
 		mtx_lock(&vm_object_list_mtx);
 		count = 0;
 		TAILQ_FOREACH(obj, &vm_object_list, object_list) {
 			if (obj->type == OBJT_DEAD)
 				continue;
 			count++;
 		}
 		mtx_unlock(&vm_object_list_mtx);
 		return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
 		    count * 11 / 10));
 	}
 
 	error = 0;
 
 	/*
 	 * VM objects are type stable and are never removed from the
 	 * list once added.  This allows us to safely read obj->object_list
 	 * after reacquiring the VM object lock.
 	 */
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(obj, &vm_object_list, object_list) {
 		if (obj->type == OBJT_DEAD)
 			continue;
 		VM_OBJECT_RLOCK(obj);
 		if (obj->type == OBJT_DEAD) {
 			VM_OBJECT_RUNLOCK(obj);
 			continue;
 		}
 		mtx_unlock(&vm_object_list_mtx);
 		kvo.kvo_size = ptoa(obj->size);
 		kvo.kvo_resident = obj->resident_page_count;
 		kvo.kvo_ref_count = obj->ref_count;
 		kvo.kvo_shadow_count = obj->shadow_count;
 		kvo.kvo_memattr = obj->memattr;
 		kvo.kvo_active = 0;
 		kvo.kvo_inactive = 0;
 		TAILQ_FOREACH(m, &obj->memq, listq) {
 			/*
 			 * A page may belong to the object but be
 			 * dequeued and set to PQ_NONE while the
 			 * object lock is not held.  This makes the
 			 * reads of m->queue below racy, and we do not
 			 * count pages set to PQ_NONE.  However, this
 			 * sysctl is only meant to give an
 			 * approximation of the system anyway.
 			 */
 			if (vm_page_active(m))
 				kvo.kvo_active++;
 			else if (vm_page_inactive(m))
 				kvo.kvo_inactive++;
 		}
 
 		kvo.kvo_vn_fileid = 0;
 		kvo.kvo_vn_fsid = 0;
+		kvo.kvo_vn_fsid_freebsd11 = 0;
 		freepath = NULL;
 		fullpath = "";
 		vp = NULL;
 		switch (obj->type) {
 		case OBJT_DEFAULT:
 			kvo.kvo_type = KVME_TYPE_DEFAULT;
 			break;
 		case OBJT_VNODE:
 			kvo.kvo_type = KVME_TYPE_VNODE;
 			vp = obj->handle;
 			vref(vp);
 			break;
 		case OBJT_SWAP:
 			kvo.kvo_type = KVME_TYPE_SWAP;
 			break;
 		case OBJT_DEVICE:
 			kvo.kvo_type = KVME_TYPE_DEVICE;
 			break;
 		case OBJT_PHYS:
 			kvo.kvo_type = KVME_TYPE_PHYS;
 			break;
 		case OBJT_DEAD:
 			kvo.kvo_type = KVME_TYPE_DEAD;
 			break;
 		case OBJT_SG:
 			kvo.kvo_type = KVME_TYPE_SG;
 			break;
 		case OBJT_MGTDEVICE:
 			kvo.kvo_type = KVME_TYPE_MGTDEVICE;
 			break;
 		default:
 			kvo.kvo_type = KVME_TYPE_UNKNOWN;
 			break;
 		}
 		VM_OBJECT_RUNLOCK(obj);
 		if (vp != NULL) {
 			vn_fullpath(curthread, vp, &fullpath, &freepath);
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
 				kvo.kvo_vn_fileid = va.va_fileid;
 				kvo.kvo_vn_fsid = va.va_fsid;
+				kvo.kvo_vn_fsid_freebsd11 = va.va_fsid;
+								/* truncate */
 			}
 			vput(vp);
 		}
 
 		strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) +
 		    strlen(kvo.kvo_path) + 1;
 		kvo.kvo_structsize = roundup(kvo.kvo_structsize,
 		    sizeof(uint64_t));
 		error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize);
 		mtx_lock(&vm_object_list_mtx);
 		if (error)
 			break;
 	}
 	mtx_unlock(&vm_object_list_mtx);
 	return (error);
 }
 SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
     "List of VM objects");
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 static int
 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 {
 	vm_map_t tmpm;
 	vm_map_entry_t tmpe;
 	vm_object_t obj;
 	int entcount;
 
 	if (map == 0)
 		return 0;
 
 	if (entry == 0) {
 		tmpe = map->header.next;
 		entcount = map->nentries;
 		while (entcount-- && (tmpe != &map->header)) {
 			if (_vm_object_in_map(map, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 		tmpm = entry->object.sub_map;
 		tmpe = tmpm->header.next;
 		entcount = tmpm->nentries;
 		while (entcount-- && tmpe != &tmpm->header) {
 			if (_vm_object_in_map(tmpm, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if ((obj = entry->object.vm_object) != NULL) {
 		for (; obj; obj = obj->backing_object)
 			if (obj == object) {
 				return 1;
 			}
 	}
 	return 0;
 }
 
 static int
 vm_object_in_map(vm_object_t object)
 {
 	struct proc *p;
 
 	/* sx_slock(&allproc_lock); */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 			continue;
 		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 			/* sx_sunlock(&allproc_lock); */
 			return 1;
 		}
 	}
 	/* sx_sunlock(&allproc_lock); */
 	if (_vm_object_in_map(kernel_map, object, 0))
 		return 1;
 	return 0;
 }
 
 DB_SHOW_COMMAND(vmochk, vm_object_check)
 {
 	vm_object_t object;
 
 	/*
 	 * make sure that internal objs are in a map somewhere
 	 * and none have zero ref counts.
 	 */
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		if (object->handle == NULL &&
 		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 			if (object->ref_count == 0) {
 				db_printf("vmochk: internal obj has zero ref count: %ld\n",
 					(long)object->size);
 			}
 			if (!vm_object_in_map(object)) {
 				db_printf(
 			"vmochk: internal obj is not in a map: "
 			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 				    object->ref_count, (u_long)object->size, 
 				    (u_long)object->size,
 				    (void *)object->backing_object);
 			}
 		}
 	}
 }
 
 /*
  *	vm_object_print:	[ debug ]
  */
 DB_SHOW_COMMAND(object, vm_object_print_static)
 {
 	/* XXX convert args. */
 	vm_object_t object = (vm_object_t)addr;
 	boolean_t full = have_addr;
 
 	vm_page_t p;
 
 	/* XXX count is an (unused) arg.  Avoid shadowing it. */
 #define	count	was_count
 
 	int count;
 
 	if (object == NULL)
 		return;
 
 	db_iprintf(
 	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n",
 	    object, (int)object->type, (uintmax_t)object->size,
 	    object->resident_page_count, object->ref_count, object->flags,
 	    object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge);
 	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 	    object->shadow_count, 
 	    object->backing_object ? object->backing_object->ref_count : 0,
 	    object->backing_object, (uintmax_t)object->backing_object_offset);
 
 	if (!full)
 		return;
 
 	db_indent += 2;
 	count = 0;
 	TAILQ_FOREACH(p, &object->memq, listq) {
 		if (count == 0)
 			db_iprintf("memory:=");
 		else if (count == 6) {
 			db_printf("\n");
 			db_iprintf(" ...");
 			count = 0;
 		} else
 			db_printf(",");
 		count++;
 
 		db_printf("(off=0x%jx,page=0x%jx)",
 		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 	}
 	if (count != 0)
 		db_printf("\n");
 	db_indent -= 2;
 }
 
 /* XXX. */
 #undef count
 
 /* XXX need this non-static entry for calling from vm_map_print. */
 void
 vm_object_print(
         /* db_expr_t */ long addr,
 	boolean_t have_addr,
 	/* db_expr_t */ long count,
 	char *modif)
 {
 	vm_object_print_static(addr, have_addr, count, modif);
 }
 
 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 {
 	vm_object_t object;
 	vm_pindex_t fidx;
 	vm_paddr_t pa;
 	vm_page_t m, prev_m;
 	int rcount, nl, c;
 
 	nl = 0;
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		db_printf("new object: %p\n", (void *)object);
 		if (nl > 18) {
 			c = cngetc();
 			if (c != ' ')
 				return;
 			nl = 0;
 		}
 		nl++;
 		rcount = 0;
 		fidx = 0;
 		pa = -1;
 		TAILQ_FOREACH(m, &object->memq, listq) {
 			if (m->pindex > 128)
 				break;
 			if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
 			    prev_m->pindex + 1 != m->pindex) {
 				if (rcount) {
 					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 						(long)fidx, rcount, (long)pa);
 					if (nl > 18) {
 						c = cngetc();
 						if (c != ' ')
 							return;
 						nl = 0;
 					}
 					nl++;
 					rcount = 0;
 				}
 			}				
 			if (rcount &&
 				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 				++rcount;
 				continue;
 			}
 			if (rcount) {
 				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 					(long)fidx, rcount, (long)pa);
 				if (nl > 18) {
 					c = cngetc();
 					if (c != ' ')
 						return;
 					nl = 0;
 				}
 				nl++;
 			}
 			fidx = m->pindex;
 			pa = VM_PAGE_TO_PHYS(m);
 			rcount = 1;
 		}
 		if (rcount) {
 			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 				(long)fidx, rcount, (long)pa);
 			if (nl > 18) {
 				c = cngetc();
 				if (c != ' ')
 					return;
 				nl = 0;
 			}
 			nl++;
 		}
 	}
 }
 #endif /* DDB */
Index: head/sys/vm/vm_param.h
===================================================================
--- head/sys/vm/vm_param.h	(revision 318735)
+++ head/sys/vm/vm_param.h	(revision 318736)
@@ -1,134 +1,134 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_param.h	8.1 (Berkeley) 6/11/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Machine independent virtual memory parameters.
  */
 
 #ifndef	_VM_PARAM_
 #define	_VM_PARAM_
 
 #include <machine/vmparam.h>
 
 /*
  * CTL_VM identifiers
  */
 #define	VM_TOTAL		1	/* struct vmtotal */
 #define	VM_METER                VM_TOTAL/* deprecated, use VM_TOTAL */
 #define	VM_LOADAVG	 	2	/* struct loadavg */
 #define VM_V_FREE_MIN		3	/* vm_cnt.v_free_min */
 #define VM_V_FREE_TARGET	4	/* vm_cnt.v_free_target */
 #define VM_V_FREE_RESERVED	5	/* vm_cnt.v_free_reserved */
 #define VM_V_INACTIVE_TARGET	6	/* vm_cnt.v_inactive_target */
 #define	VM_OBSOLETE_7		7	/* unused, formerly v_cache_min */
 #define	VM_OBSOLETE_8		8	/* unused, formerly v_cache_max */
 #define VM_V_PAGEOUT_FREE_MIN	9	/* vm_cnt.v_pageout_free_min */
 #define	VM_OBSOLETE_10		10	/* pageout algorithm */
 #define VM_SWAPPING_ENABLED	11	/* swapping enabled */
 #define	VM_MAXID		12	/* number of valid vm ids */
 
 /*
  * Structure for swap device statistics
  */
-#define XSWDEV_VERSION	1
+#define XSWDEV_VERSION	2
 struct xswdev {
 	u_int	xsw_version;
 	dev_t	xsw_dev;
 	int	xsw_flags;
 	int	xsw_nblks;
 	int     xsw_used;
 };
 
 /*
  *	Return values from the VM routines.
  */
 #define	KERN_SUCCESS		0
 #define	KERN_INVALID_ADDRESS	1
 #define	KERN_PROTECTION_FAILURE	2
 #define	KERN_NO_SPACE		3
 #define	KERN_INVALID_ARGUMENT	4
 #define	KERN_FAILURE		5
 #define	KERN_RESOURCE_SHORTAGE	6
 #define	KERN_NOT_RECEIVER	7
 #define	KERN_NO_ACCESS		8
 
 #ifndef PA_LOCK_COUNT
 #ifdef SMP
 #define	PA_LOCK_COUNT	32
 #else
 #define PA_LOCK_COUNT	1
 #endif	/* !SMP */
 #endif	/* !PA_LOCK_COUNT */
 
 #ifndef ASSEMBLER
 #ifdef _KERNEL
 #define num_pages(x) \
 	((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
 extern	unsigned long maxtsiz;
 extern	unsigned long dfldsiz;
 extern	unsigned long maxdsiz;
 extern	unsigned long dflssiz;
 extern	unsigned long maxssiz;
 extern	unsigned long sgrowsiz;
 #endif				/* _KERNEL */
 #endif				/* ASSEMBLER */
 #endif				/* _VM_PARAM_ */
Index: head/usr.bin/kdump/kdump.c
===================================================================
--- head/usr.bin/kdump/kdump.c	(revision 318735)
+++ head/usr.bin/kdump/kdump.c	(revision 318736)
@@ -1,2026 +1,2025 @@
 /*-
  * Copyright (c) 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1988, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)kdump.c	8.1 (Berkeley) 6/6/93";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _WANT_KERNEL_ERRNO
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>
 #include <sys/umtx.h>
 #include <sys/un.h>
 #include <sys/queue.h>
 #include <sys/wait.h>
 #ifdef HAVE_LIBCASPER
 #include <sys/nv.h>
 #endif
 #include <arpa/inet.h>
 #include <netinet/in.h>
 #include <ctype.h>
 #include <capsicum_helpers.h>
 #include <err.h>
 #include <grp.h>
 #include <inttypes.h>
 #include <locale.h>
 #include <netdb.h>
 #include <nl_types.h>
 #include <pwd.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysdecode.h>
 #include <time.h>
 #include <unistd.h>
 #include <vis.h>
 #include "ktrace.h"
 
 #ifdef HAVE_LIBCASPER
 #include <libcasper.h>
 
 #include <casper/cap_grp.h>
 #include <casper/cap_pwd.h>
 #endif
 
 u_int abidump(struct ktr_header *);
 int fetchprocinfo(struct ktr_header *, u_int *);
 int fread_tail(void *, int, int);
 void dumpheader(struct ktr_header *);
 void ktrsyscall(struct ktr_syscall *, u_int);
 void ktrsysret(struct ktr_sysret *, u_int);
 void ktrnamei(char *, int);
 void hexdump(char *, int, int);
 void visdump(char *, int, int);
 void ktrgenio(struct ktr_genio *, int);
 void ktrpsig(struct ktr_psig *);
 void ktrcsw(struct ktr_csw *);
 void ktrcsw_old(struct ktr_csw_old *);
 void ktruser(int, void *);
 void ktrcaprights(cap_rights_t *);
 void ktritimerval(struct itimerval *it);
 void ktrsockaddr(struct sockaddr *);
 void ktrstat(struct stat *);
 void ktrstruct(char *, size_t);
 void ktrcapfail(struct ktr_cap_fail *);
 void ktrfault(struct ktr_fault *);
 void ktrfaultend(struct ktr_faultend *);
 void usage(void);
 
 #define	TIMESTAMP_NONE		0x0
 #define	TIMESTAMP_ABSOLUTE	0x1
 #define	TIMESTAMP_ELAPSED	0x2
 #define	TIMESTAMP_RELATIVE	0x4
 
 static int timestamp, decimal, fancy = 1, suppressdata, tail, threads, maxdata,
     resolv = 0, abiflag = 0, syscallno = 0;
 static const char *tracefile = DEF_TRACEFILE;
 static struct ktr_header ktr_header;
 
 #define TIME_FORMAT	"%b %e %T %Y"
 #define eqs(s1, s2)	(strcmp((s1), (s2)) == 0)
 
 #define	print_number64(first,i,n,c) do {				\
 	uint64_t __v;							\
 									\
 	if (quad_align && (((ptrdiff_t)((i) - (first))) & 1) == 1) {	\
 		(i)++;							\
 		(n)--;							\
 	}								\
 	if (quad_slots == 2)						\
 		__v = (uint64_t)(uint32_t)(i)[0] |			\
 		    ((uint64_t)(uint32_t)(i)[1]) << 32;			\
 	else								\
 		__v = (uint64_t)*(i);					\
 	if (decimal)							\
 		printf("%c%jd", (c), (intmax_t)__v);			\
 	else								\
 		printf("%c%#jx", (c), (uintmax_t)__v);			\
 	(i) += quad_slots;						\
 	(n) -= quad_slots;						\
 	(c) = ',';							\
 } while (0)
 
 #define print_number(i,n,c) do {					\
 	if (decimal)							\
 		printf("%c%jd", c, (intmax_t)*i);			\
 	else								\
 		printf("%c%#jx", c, (uintmax_t)(u_register_t)*i);	\
 	i++;								\
 	n--;								\
 	c = ',';							\
 } while (0)
 
 struct proc_info
 {
 	TAILQ_ENTRY(proc_info)	info;
 	u_int			sv_flags;
 	pid_t			pid;
 };
 
 static TAILQ_HEAD(trace_procs, proc_info) trace_procs;
 
 #ifdef HAVE_LIBCASPER
 static cap_channel_t *cappwd, *capgrp;
 #endif
 
 static void
 strerror_init(void)
 {
 
 	/*
 	 * Cache NLS data before entering capability mode.
 	 * XXXPJD: There should be strerror_init() and strsignal_init() in libc.
 	 */
 	(void)catopen("libc", NL_CAT_LOCALE);
 }
 
 static void
 localtime_init(void)
 {
 	time_t ltime;
 
 	/*
 	 * Allow localtime(3) to cache /etc/localtime content before entering
 	 * capability mode.
 	 * XXXPJD: There should be localtime_init() in libc.
 	 */
 	(void)time(&ltime);
 	(void)localtime(&ltime);
 }
 
 #ifdef HAVE_LIBCASPER
 static int
 cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp)
 {
 	cap_channel_t *capcas, *cappwdloc, *capgrploc;
 	const char *cmds[1], *fields[1];
 
 	capcas = cap_init();
 	if (capcas == NULL) {
 		err(1, "unable to create casper process");
 		exit(1);
 	}
 	cappwdloc = cap_service_open(capcas, "system.pwd");
 	capgrploc = cap_service_open(capcas, "system.grp");
 	/* Casper capability no longer needed. */
 	cap_close(capcas);
 	if (cappwdloc == NULL || capgrploc == NULL) {
 		if (cappwdloc == NULL)
 			warn("unable to open system.pwd service");
 		if (capgrploc == NULL)
 			warn("unable to open system.grp service");
 		exit(1);
 	}
 	/* Limit system.pwd to only getpwuid() function and pw_name field. */
 	cmds[0] = "getpwuid";
 	if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0)
 		err(1, "unable to limit system.pwd service");
 	fields[0] = "pw_name";
 	if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0)
 		err(1, "unable to limit system.pwd service");
 	/* Limit system.grp to only getgrgid() function and gr_name field. */
 	cmds[0] = "getgrgid";
 	if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0)
 		err(1, "unable to limit system.grp service");
 	fields[0] = "gr_name";
 	if (cap_grp_limit_fields(capgrploc, fields, 1) < 0)
 		err(1, "unable to limit system.grp service");
 
 	*cappwdp = cappwdloc;
 	*capgrpp = capgrploc;
 	return (0);
 }
 #endif	/* HAVE_LIBCASPER */
 
 static void
 print_integer_arg(const char *(*decoder)(int), int value)
 {
 	const char *str;
 
 	str = decoder(value);
 	if (str != NULL)
 		printf("%s", str);
 	else {
 		if (decimal)
 			printf("<invalid=%d>", value);
 		else
 			printf("<invalid=%#x>", value);
 	}
 }
 
 /* Like print_integer_arg but unknown values are treated as valid. */
 static void
 print_integer_arg_valid(const char *(*decoder)(int), int value)
 {
 	const char *str;
 
 	str = decoder(value);
 	if (str != NULL)
 		printf("%s", str);
 	else {
 		if (decimal)
 			printf("%d", value);
 		else
 			printf("%#x", value);
 	}
 }
 
 static void
 print_mask_arg(bool (*decoder)(FILE *, int, int *), int value)
 {
 	bool invalid;
 	int rem;
 
 	printf("%#x<", value);
 	invalid = !decoder(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%u", rem);
 }
 
 static void
 print_mask_arg0(bool (*decoder)(FILE *, int, int *), int value)
 {
 	bool invalid;
 	int rem;
 
 	if (value == 0) {
 		printf("0");
 		return;
 	}
 	printf("%#x<", value);
 	invalid = !decoder(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%u", rem);
 }
 
 static void
 decode_fileflags(fflags_t value)
 {
 	bool invalid;
 	fflags_t rem;
 
 	if (value == 0) {
 		printf("0");
 		return;
 	}
 	printf("%#x<", value);
 	invalid = !sysdecode_fileflags(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%u", rem);
 }
 
 static void
 decode_filemode(int value)
 {
 	bool invalid;
 	int rem;
 
 	if (value == 0) {
 		printf("0");
 		return;
 	}
 	printf("%#o<", value);
 	invalid = !sysdecode_filemode(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%u", rem);
 }
 
 static void
 print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), uint32_t value)
 {
 	bool invalid;
 	uint32_t rem;
 
 	printf("%#x<", value);
 	invalid = !decoder(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%u", rem);
 }
 
 static void
 print_mask_argul(bool (*decoder)(FILE *, u_long, u_long *), u_long value)
 {
 	bool invalid;
 	u_long rem;
 
 	if (value == 0) {
 		printf("0");
 		return;
 	}
 	printf("%#lx<", value);
 	invalid = !decoder(stdout, value, &rem);
 	printf(">");
 	if (invalid)
 		printf("<invalid>%lu", rem);
 }
 
 int
 main(int argc, char *argv[])
 {
 	int ch, ktrlen, size;
 	void *m;
 	int trpoints = ALL_POINTS;
 	int drop_logged;
 	pid_t pid = 0;
 	u_int sv_flags;
 
 	setlocale(LC_CTYPE, "");
 
 	timestamp = TIMESTAMP_NONE;
 
 	while ((ch = getopt(argc,argv,"f:dElm:np:AHRrSsTt:")) != -1)
 		switch (ch) {
 		case 'A':
 			abiflag = 1;
 			break;
 		case 'f':
 			tracefile = optarg;
 			break;
 		case 'd':
 			decimal = 1;
 			break;
 		case 'l':
 			tail = 1;
 			break;
 		case 'm':
 			maxdata = atoi(optarg);
 			break;
 		case 'n':
 			fancy = 0;
 			break;
 		case 'p':
 			pid = atoi(optarg);
 			break;
 		case 'r':
 			resolv = 1;
 			break;
 		case 'S':
 			syscallno = 1;
 			break;
 		case 's':
 			suppressdata = 1;
 			break;
 		case 'E':
 			timestamp |= TIMESTAMP_ELAPSED;
 			break;
 		case 'H':
 			threads = 1;
 			break;
 		case 'R':
 			timestamp |= TIMESTAMP_RELATIVE;
 			break;
 		case 'T':
 			timestamp |= TIMESTAMP_ABSOLUTE;
 			break;
 		case 't':
 			trpoints = getpoints(optarg);
 			if (trpoints < 0)
 				errx(1, "unknown trace point in %s", optarg);
 			break;
 		default:
 			usage();
 		}
 
 	if (argc > optind)
 		usage();
 
 	m = malloc(size = 1025);
 	if (m == NULL)
 		errx(1, "%s", strerror(ENOMEM));
 	if (strcmp(tracefile, "-") != 0)
 		if (!freopen(tracefile, "r", stdin))
 			err(1, "%s", tracefile);
 
 	strerror_init();
 	localtime_init();
 #ifdef HAVE_LIBCASPER
 	if (resolv != 0) {
 		if (cappwdgrp_setup(&cappwd, &capgrp) < 0) {
 			cappwd = NULL;
 			capgrp = NULL;
 		}
 	}
 	if (resolv == 0 || (cappwd != NULL && capgrp != NULL)) {
 		if (cap_enter() < 0 && errno != ENOSYS)
 			err(1, "unable to enter capability mode");
 	}
 #else
 	if (resolv == 0) {
 		if (cap_enter() < 0 && errno != ENOSYS)
 			err(1, "unable to enter capability mode");
 	}
 #endif
 	if (caph_limit_stdio() == -1)
 		err(1, "unable to limit stdio");
 
 	TAILQ_INIT(&trace_procs);
 	drop_logged = 0;
 	while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) {
 		if (ktr_header.ktr_type & KTR_DROP) {
 			ktr_header.ktr_type &= ~KTR_DROP;
 			if (!drop_logged && threads) {
 				printf(
 				    "%6jd %6jd %-8.*s Events dropped.\n",
 				    (intmax_t)ktr_header.ktr_pid,
 				    ktr_header.ktr_tid > 0 ?
 				    (intmax_t)ktr_header.ktr_tid : 0,
 				    MAXCOMLEN, ktr_header.ktr_comm);
 				drop_logged = 1;
 			} else if (!drop_logged) {
 				printf("%6jd %-8.*s Events dropped.\n",
 				    (intmax_t)ktr_header.ktr_pid, MAXCOMLEN,
 				    ktr_header.ktr_comm);
 				drop_logged = 1;
 			}
 		}
 		if (trpoints & (1<<ktr_header.ktr_type))
 			if (pid == 0 || ktr_header.ktr_pid == pid ||
 			    ktr_header.ktr_tid == pid)
 				dumpheader(&ktr_header);
 		if ((ktrlen = ktr_header.ktr_len) < 0)
 			errx(1, "bogus length 0x%x", ktrlen);
 		if (ktrlen > size) {
 			m = realloc(m, ktrlen+1);
 			if (m == NULL)
 				errx(1, "%s", strerror(ENOMEM));
 			size = ktrlen;
 		}
 		if (ktrlen && fread_tail(m, ktrlen, 1) == 0)
 			errx(1, "data too short");
 		if (fetchprocinfo(&ktr_header, (u_int *)m) != 0)
 			continue;
 		sv_flags = abidump(&ktr_header);
 		if (pid && ktr_header.ktr_pid != pid &&
 		    ktr_header.ktr_tid != pid)
 			continue;
 		if ((trpoints & (1<<ktr_header.ktr_type)) == 0)
 			continue;
 		drop_logged = 0;
 		switch (ktr_header.ktr_type) {
 		case KTR_SYSCALL:
 			ktrsyscall((struct ktr_syscall *)m, sv_flags);
 			break;
 		case KTR_SYSRET:
 			ktrsysret((struct ktr_sysret *)m, sv_flags);
 			break;
 		case KTR_NAMEI:
 		case KTR_SYSCTL:
 			ktrnamei(m, ktrlen);
 			break;
 		case KTR_GENIO:
 			ktrgenio((struct ktr_genio *)m, ktrlen);
 			break;
 		case KTR_PSIG:
 			ktrpsig((struct ktr_psig *)m);
 			break;
 		case KTR_CSW:
 			if (ktrlen == sizeof(struct ktr_csw_old))
 				ktrcsw_old((struct ktr_csw_old *)m);
 			else
 				ktrcsw((struct ktr_csw *)m);
 			break;
 		case KTR_USER:
 			ktruser(ktrlen, m);
 			break;
 		case KTR_STRUCT:
 			ktrstruct(m, ktrlen);
 			break;
 		case KTR_CAPFAIL:
 			ktrcapfail((struct ktr_cap_fail *)m);
 			break;
 		case KTR_FAULT:
 			ktrfault((struct ktr_fault *)m);
 			break;
 		case KTR_FAULTEND:
 			ktrfaultend((struct ktr_faultend *)m);
 			break;
 		default:
 			printf("\n");
 			break;
 		}
 		if (tail)
 			fflush(stdout);
 	}
 	return 0;
 }
 
 int
 fread_tail(void *buf, int size, int num)
 {
 	int i;
 
 	while ((i = fread(buf, size, num, stdin)) == 0 && tail) {
 		sleep(1);
 		clearerr(stdin);
 	}
 	return (i);
 }
 
 int
 fetchprocinfo(struct ktr_header *kth, u_int *flags)
 {
 	struct proc_info *pi;
 
 	switch (kth->ktr_type) {
 	case KTR_PROCCTOR:
 		TAILQ_FOREACH(pi, &trace_procs, info) {
 			if (pi->pid == kth->ktr_pid) {
 				TAILQ_REMOVE(&trace_procs, pi, info);
 				break;
 			}
 		}
 		pi = malloc(sizeof(struct proc_info));
 		if (pi == NULL)
 			errx(1, "%s", strerror(ENOMEM));
 		pi->sv_flags = *flags;
 		pi->pid = kth->ktr_pid;
 		TAILQ_INSERT_TAIL(&trace_procs, pi, info);
 		return (1);
 
 	case KTR_PROCDTOR:
 		TAILQ_FOREACH(pi, &trace_procs, info) {
 			if (pi->pid == kth->ktr_pid) {
 				TAILQ_REMOVE(&trace_procs, pi, info);
 				free(pi);
 				break;
 			}
 		}
 		return (1);
 	}
 
 	return (0);
 }
 
 u_int
 abidump(struct ktr_header *kth)
 {
 	struct proc_info *pi;
 	const char *abi;
 	const char *arch;
 	u_int flags = 0;
 
 	TAILQ_FOREACH(pi, &trace_procs, info) {
 		if (pi->pid == kth->ktr_pid) {
 			flags = pi->sv_flags;
 			break;
 		}
 	}
 
 	if (abiflag == 0)
 		return (flags);
 
 	switch (flags & SV_ABI_MASK) {
 	case SV_ABI_LINUX:
 		abi = "L";
 		break;
 	case SV_ABI_FREEBSD:
 		abi = "F";
 		break;
 	case SV_ABI_CLOUDABI:
 		abi = "C";
 		break;
 	default:
 		abi = "U";
 		break;
 	}
 
 	if (flags & SV_LP64)
 		arch = "64";
 	else if (flags & SV_ILP32)
 		arch = "32";
 	else
 		arch = "00";
 
 	printf("%s%s  ", abi, arch);
 
 	return (flags);
 }
 
 void
 dumpheader(struct ktr_header *kth)
 {
 	static char unknown[64];
 	static struct timeval prevtime, prevtime_e;
 	struct timeval temp;
 	const char *type;
 	const char *sign;
 
 	switch (kth->ktr_type) {
 	case KTR_SYSCALL:
 		type = "CALL";
 		break;
 	case KTR_SYSRET:
 		type = "RET ";
 		break;
 	case KTR_NAMEI:
 		type = "NAMI";
 		break;
 	case KTR_GENIO:
 		type = "GIO ";
 		break;
 	case KTR_PSIG:
 		type = "PSIG";
 		break;
 	case KTR_CSW:
 		type = "CSW ";
 		break;
 	case KTR_USER:
 		type = "USER";
 		break;
 	case KTR_STRUCT:
 		type = "STRU";
 		break;
 	case KTR_SYSCTL:
 		type = "SCTL";
 		break;
 	case KTR_PROCCTOR:
 		/* FALLTHROUGH */
 	case KTR_PROCDTOR:
 		return;
 	case KTR_CAPFAIL:
 		type = "CAP ";
 		break;
 	case KTR_FAULT:
 		type = "PFLT";
 		break;
 	case KTR_FAULTEND:
 		type = "PRET";
 		break;
 	default:
 		sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type);
 		type = unknown;
 	}
 
 	/*
 	 * The ktr_tid field was previously the ktr_buffer field, which held
 	 * the kernel pointer value for the buffer associated with data
 	 * following the record header.  It now holds a threadid, but only
 	 * for trace files after the change.  Older trace files still contain
 	 * kernel pointers.  Detect this and suppress the results by printing
 	 * negative tid's as 0.
 	 */
 	if (threads)
 		printf("%6jd %6jd %-8.*s ", (intmax_t)kth->ktr_pid,
 		    kth->ktr_tid > 0 ? (intmax_t)kth->ktr_tid : 0,
 		    MAXCOMLEN, kth->ktr_comm);
 	else
 		printf("%6jd %-8.*s ", (intmax_t)kth->ktr_pid, MAXCOMLEN,
 		    kth->ktr_comm);
         if (timestamp) {
 		if (timestamp & TIMESTAMP_ABSOLUTE) {
 			printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec,
 			    kth->ktr_time.tv_usec);
 		}
 		if (timestamp & TIMESTAMP_ELAPSED) {
 			if (prevtime_e.tv_sec == 0)
 				prevtime_e = kth->ktr_time;
 			timersub(&kth->ktr_time, &prevtime_e, &temp);
 			printf("%jd.%06ld ", (intmax_t)temp.tv_sec,
 			    temp.tv_usec);
 		}
 		if (timestamp & TIMESTAMP_RELATIVE) {
 			if (prevtime.tv_sec == 0)
 				prevtime = kth->ktr_time;
 			if (timercmp(&kth->ktr_time, &prevtime, <)) {
 				timersub(&prevtime, &kth->ktr_time, &temp);
 				sign = "-";
 			} else {
 				timersub(&kth->ktr_time, &prevtime, &temp);
 				sign = "";
 			}
 			prevtime = kth->ktr_time;
 			printf("%s%jd.%06ld ", sign, (intmax_t)temp.tv_sec,
 			    temp.tv_usec);
 		}
 	}
 	printf("%s  ", type);
 }
 
 #include <sys/syscall.h>
 
 static void
 ioctlname(unsigned long val)
 {
 	const char *str;
 
 	str = sysdecode_ioctlname(val);
 	if (str != NULL)
 		printf("%s", str);
 	else if (decimal)
 		printf("%lu", val);
 	else
 		printf("%#lx", val);
 }
 
 static enum sysdecode_abi
 syscallabi(u_int sv_flags)
 {
 
 	if (sv_flags == 0)
 		return (SYSDECODE_ABI_FREEBSD);
 	switch (sv_flags & SV_ABI_MASK) {
 	case SV_ABI_FREEBSD:
 		return (SYSDECODE_ABI_FREEBSD);
 #if defined(__amd64__) || defined(__i386__)
 	case SV_ABI_LINUX:
 #ifdef __amd64__
 		if (sv_flags & SV_ILP32)
 			return (SYSDECODE_ABI_LINUX32);
 #endif
 		return (SYSDECODE_ABI_LINUX);
 #endif
 #if defined(__aarch64__) || defined(__amd64__)
 	case SV_ABI_CLOUDABI:
 		return (SYSDECODE_ABI_CLOUDABI64);
 #endif
 	default:
 		return (SYSDECODE_ABI_UNKNOWN);
 	}
 }
 
 static void
 syscallname(u_int code, u_int sv_flags)
 {
 	const char *name;
 
 	name = sysdecode_syscallname(syscallabi(sv_flags), code);
 	if (name == NULL)
 		printf("[%d]", code);
 	else {
 		printf("%s", name);
 		if (syscallno)
 			printf("[%d]", code);
 	}
 }
 
 static void
 print_signal(int signo)
 {
 	const char *signame;
 
 	signame = sysdecode_signal(signo);
 	if (signame != NULL)
 		printf("%s", signame);
 	else
 		printf("SIG %d", signo);
 }
 
 void
 ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags)
 {
 	int narg = ktr->ktr_narg;
 	register_t *ip, *first;
 	intmax_t arg;
 	int quad_align, quad_slots;
 
 	syscallname(ktr->ktr_code, sv_flags);
 	ip = first = &ktr->ktr_args[0];
 	if (narg) {
 		char c = '(';
 		if (fancy &&
 		    (sv_flags == 0 ||
 		    (sv_flags & SV_ABI_MASK) == SV_ABI_FREEBSD)) {
 			quad_align = 0;
 			if (sv_flags & SV_ILP32) {
 #ifdef __powerpc__
 				quad_align = 1;
 #endif
 				quad_slots = 2;
 			} else
 				quad_slots = 1;
 			switch (ktr->ktr_code) {
 			case SYS_bindat:
 			case SYS_chflagsat:
 			case SYS_connectat:
 			case SYS_faccessat:
 			case SYS_fchmodat:
 			case SYS_fchownat:
 			case SYS_fstatat:
 			case SYS_futimesat:
 			case SYS_linkat:
 			case SYS_mkdirat:
 			case SYS_mkfifoat:
 			case SYS_mknodat:
 			case SYS_openat:
 			case SYS_readlinkat:
 			case SYS_renameat:
 			case SYS_unlinkat:
 			case SYS_utimensat:
 				putchar('(');
 				print_integer_arg_valid(sysdecode_atfd, *ip);
 				c = ',';
 				ip++;
 				narg--;
 				break;
 			}
 			switch (ktr->ktr_code) {
 			case SYS_ioctl: {
 				print_number(ip, narg, c);
 				putchar(c);
 				ioctlname(*ip);
 				c = ',';
 				ip++;
 				narg--;
 				break;
 			}
 			case SYS_ptrace:
 				putchar('(');
 				print_integer_arg(sysdecode_ptrace_request, *ip);
 				c = ',';
 				ip++;
 				narg--;
 				break;
 			case SYS_access:
 			case SYS_eaccess:
 			case SYS_faccessat:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_access_mode, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_open:
 			case SYS_openat:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_open_flags, ip[0]);
 				if ((ip[0] & O_CREAT) == O_CREAT) {
 					putchar(',');
 					decode_filemode(ip[1]);
 				}
 				ip += 2;
 				narg -= 2;
 				break;
 			case SYS_wait4:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg0(sysdecode_wait4_options, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_wait6:
 				putchar('(');
 				print_integer_arg(sysdecode_idtype, *ip);
 				c = ',';
 				ip++;
 				narg--;
 				print_number64(first, ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_wait6_options, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_chmod:
 			case SYS_fchmod:
 			case SYS_lchmod:
 			case SYS_fchmodat:
 				print_number(ip, narg, c);
 				putchar(',');
 				decode_filemode(*ip);
 				ip++;
 				narg--;
 				break;
-			case SYS_mknod:
 			case SYS_mknodat:
 				print_number(ip, narg, c);
 				putchar(',');
 				decode_filemode(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_getfsstat:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_getfsstat_mode, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_mount:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mount_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_unmount:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mount_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_recvmsg:
 			case SYS_sendmsg:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg0(sysdecode_msg_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_recvfrom:
 			case SYS_sendto:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg0(sysdecode_msg_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_chflags:
 			case SYS_chflagsat:
 			case SYS_fchflags:
 			case SYS_lchflags:
 				print_number(ip, narg, c);
 				putchar(',');
 				decode_fileflags(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_kill:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_signal(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_reboot:
 				putchar('(');
 				print_mask_arg(sysdecode_reboot_howto, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_umask:
 				putchar('(');
 				decode_filemode(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_msync:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_msync_flags, *ip);
 				ip++;
 				narg--;
 				break;
 #ifdef SYS_freebsd6_mmap
 			case SYS_freebsd6_mmap:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mmap_prot, *ip);
 				putchar(',');
 				ip++;
 				narg--;
 				print_mask_arg(sysdecode_mmap_flags, *ip);
 				ip++;
 				narg--;
 				break;
 #endif
 			case SYS_mmap:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mmap_prot, *ip);
 				putchar(',');
 				ip++;
 				narg--;
 				print_mask_arg(sysdecode_mmap_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_mprotect:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mmap_prot, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_madvise:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_madvice, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_setpriority:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_prio_which, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_fcntl:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_fcntl_cmd, ip[0]);
 				if (sysdecode_fcntl_arg_p(ip[0])) {
 					putchar(',');
 					if (ip[0] == F_SETFL)
 						print_mask_arg(
 						    sysdecode_fcntl_fileflags,
 							ip[1]);
 					else
 						sysdecode_fcntl_arg(stdout,
 						    ip[0], ip[1],
 						    decimal ? 10 : 16);
 				}
 				ip += 2;
 				narg -= 2;
 				break;
 			case SYS_socket: {
 				int sockdomain;
 				putchar('(');
 				sockdomain = *ip;
 				print_integer_arg(sysdecode_socketdomain,
 				    sockdomain);
 				ip++;
 				narg--;
 				putchar(',');
 				print_mask_arg(sysdecode_socket_type, *ip);
 				ip++;
 				narg--;
 				if (sockdomain == PF_INET ||
 				    sockdomain == PF_INET6) {
 					putchar(',');
 					print_integer_arg(sysdecode_ipproto,
 					    *ip);
 					ip++;
 					narg--;
 				}
 				c = ',';
 				break;
 			}
 			case SYS_setsockopt:
 			case SYS_getsockopt: {
 				const char *str;
 
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg_valid(sysdecode_sockopt_level,
 				    *ip);
 				str = sysdecode_sockopt_name(ip[0], ip[1]);
 				if (str != NULL) {
 					printf(",%s", str);
 					ip++;
 					narg--;
 				}
 				ip++;
 				narg--;
 				break;
 			}
 #ifdef SYS_freebsd6_lseek
 			case SYS_freebsd6_lseek:
 				print_number(ip, narg, c);
 				/* Hidden 'pad' argument, not in lseek(2) */
 				print_number(ip, narg, c);
 				print_number64(first, ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_whence, *ip);
 				ip++;
 				narg--;
 				break;
 #endif
 			case SYS_lseek:
 				print_number(ip, narg, c);
 				print_number64(first, ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_whence, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_flock:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_flock_operation, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_mkfifo:
 			case SYS_mkfifoat:
 			case SYS_mkdir:
 			case SYS_mkdirat:
 				print_number(ip, narg, c);
 				putchar(',');
 				decode_filemode(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_shutdown:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_shutdown_how, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_socketpair:
 				putchar('(');
 				print_integer_arg(sysdecode_socketdomain, *ip);
 				ip++;
 				narg--;
 				putchar(',');
 				print_mask_arg(sysdecode_socket_type, *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_getrlimit:
 			case SYS_setrlimit:
 				putchar('(');
 				print_integer_arg(sysdecode_rlimit, *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_quotactl:
 				print_number(ip, narg, c);
 				putchar(',');
 				if (!sysdecode_quotactl_cmd(stdout, *ip)) {
 					if (decimal)
 						printf("<invalid=%d>", (int)*ip);
 					else
 						printf("<invalid=%#x>",
 						    (int)*ip);
 				}
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_nfssvc:
 				putchar('(');
 				print_integer_arg(sysdecode_nfssvc_flags, *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_rtprio:
 				putchar('(');
 				print_integer_arg(sysdecode_rtprio_function,
 				    *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS___semctl:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_semctl_cmd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_semget:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_semget_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_msgctl:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_msgctl_cmd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_shmat:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_shmat_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_shmctl:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_shmctl_cmd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_shm_open:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_open_flags, ip[0]);
 				putchar(',');
 				decode_filemode(ip[1]);
 				ip += 2;
 				narg -= 2;
 				break;
 			case SYS_minherit:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_minherit_inherit,
 				    *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_rfork:
 				putchar('(');
 				print_mask_arg(sysdecode_rfork_flags, *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_lio_listio:
 				putchar('(');
 				print_integer_arg(sysdecode_lio_listio_mode,
 				    *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_mlockall:
 				putchar('(');
 				print_mask_arg(sysdecode_mlockall_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_sched_setscheduler:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_scheduler_policy,
 				    *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_sched_get_priority_max:
 			case SYS_sched_get_priority_min:
 				putchar('(');
 				print_integer_arg(sysdecode_scheduler_policy,
 				    *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_sendfile:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_sendfile_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_kldsym:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_kldsym_cmd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_sigprocmask:
 				putchar('(');
 				print_integer_arg(sysdecode_sigprocmask_how,
 				    *ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS___acl_get_file:
 			case SYS___acl_set_file:
 			case SYS___acl_get_fd:
 			case SYS___acl_set_fd:
 			case SYS___acl_delete_file:
 			case SYS___acl_delete_fd:
 			case SYS___acl_aclcheck_file:
 			case SYS___acl_aclcheck_fd:
 			case SYS___acl_get_link:
 			case SYS___acl_set_link:
 			case SYS___acl_delete_link:
 			case SYS___acl_aclcheck_link:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_acltype, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_sigaction:
 				putchar('(');
 				print_signal(*ip);
 				ip++;
 				narg--;
 				c = ',';
 				break;
 			case SYS_extattrctl:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_extattrnamespace,
 				    *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_nmount:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_mount_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_thr_create:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				putchar(',');
 				print_mask_arg(sysdecode_thr_create_flags, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_thr_kill:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_signal(*ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_kldunloadf:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_kldunload_flags,
 				    *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_linkat:
 			case SYS_renameat:
 			case SYS_symlinkat:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg_valid(sysdecode_atfd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_cap_fcntls_limit:
 				print_number(ip, narg, c);
 				putchar(',');
 				arg = *ip;
 				ip++;
 				narg--;
 				print_mask_arg32(sysdecode_cap_fcntlrights, arg);
 				break;
 			case SYS_posix_fadvise:
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				print_number(ip, narg, c);
 				(void)putchar(',');
 				print_integer_arg(sysdecode_fadvice, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS_procctl:
 				putchar('(');
 				print_integer_arg(sysdecode_idtype, *ip);
 				c = ',';
 				ip++;
 				narg--;
 				print_number64(first, ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_procctl_cmd, *ip);
 				ip++;
 				narg--;
 				break;
 			case SYS__umtx_op:
 				print_number(ip, narg, c);
 				putchar(',');
 				print_integer_arg(sysdecode_umtx_op, *ip);
 				switch (*ip) {
 				case UMTX_OP_CV_WAIT:
 					ip++;
 					narg--;
 					putchar(',');
 					print_mask_argul(
 					    sysdecode_umtx_cvwait_flags, *ip);
 					break;
 				case UMTX_OP_RW_RDLOCK:
 					ip++;
 					narg--;
 					putchar(',');
 					print_mask_argul(
 					    sysdecode_umtx_rwlock_flags, *ip);
 					break;
 				}
 				ip++;
 				narg--;
 				break;
 			case SYS_ftruncate:
 			case SYS_truncate:
 				print_number(ip, narg, c);
 				print_number64(first, ip, narg, c);
 				break;
 			}
 		}
 		while (narg > 0) {
 			print_number(ip, narg, c);
 		}
 		putchar(')');
 	}
 	putchar('\n');
 }
 
 void
 ktrsysret(struct ktr_sysret *ktr, u_int sv_flags)
 {
 	register_t ret = ktr->ktr_retval;
 	int error = ktr->ktr_error;
 
 	syscallname(ktr->ktr_code, sv_flags);
 	printf(" ");
 
 	if (error == 0) {
 		if (fancy) {
 			printf("%ld", (long)ret);
 			if (ret < 0 || ret > 9)
 				printf("/%#lx", (unsigned long)ret);
 		} else {
 			if (decimal)
 				printf("%ld", (long)ret);
 			else
 				printf("%#lx", (unsigned long)ret);
 		}
 	} else if (error == ERESTART)
 		printf("RESTART");
 	else if (error == EJUSTRETURN)
 		printf("JUSTRETURN");
 	else {
 		printf("-1 errno %d", sysdecode_freebsd_to_abi_errno(
 		    syscallabi(sv_flags), error));
 		if (fancy)
 			printf(" %s", strerror(ktr->ktr_error));
 	}
 	putchar('\n');
 }
 
 void
 ktrnamei(char *cp, int len)
 {
 	printf("\"%.*s\"\n", len, cp);
 }
 
 void
 hexdump(char *p, int len, int screenwidth)
 {
 	int n, i;
 	int width;
 
 	width = 0;
 	do {
 		width += 2;
 		i = 13;			/* base offset */
 		i += (width / 2) + 1;	/* spaces every second byte */
 		i += (width * 2);	/* width of bytes */
 		i += 3;			/* "  |" */
 		i += width;		/* each byte */
 		i += 1;			/* "|" */
 	} while (i < screenwidth);
 	width -= 2;
 
 	for (n = 0; n < len; n += width) {
 		for (i = n; i < n + width; i++) {
 			if ((i % width) == 0) {	/* beginning of line */
 				printf("       0x%04x", i);
 			}
 			if ((i % 2) == 0) {
 				printf(" ");
 			}
 			if (i < len)
 				printf("%02x", p[i] & 0xff);
 			else
 				printf("  ");
 		}
 		printf("  |");
 		for (i = n; i < n + width; i++) {
 			if (i >= len)
 				break;
 			if (p[i] >= ' ' && p[i] <= '~')
 				printf("%c", p[i]);
 			else
 				printf(".");
 		}
 		printf("|\n");
 	}
 	if ((i % width) != 0)
 		printf("\n");
 }
 
 void
 visdump(char *dp, int datalen, int screenwidth)
 {
 	int col = 0;
 	char *cp;
 	int width;
 	char visbuf[5];
 
 	printf("       \"");
 	col = 8;
 	for (;datalen > 0; datalen--, dp++) {
 		 vis(visbuf, *dp, VIS_CSTYLE, *(dp+1));
 		cp = visbuf;
 		/*
 		 * Keep track of printables and
 		 * space chars (like fold(1)).
 		 */
 		if (col == 0) {
 			putchar('\t');
 			col = 8;
 		}
 		switch(*cp) {
 		case '\n':
 			col = 0;
 			putchar('\n');
 			continue;
 		case '\t':
 			width = 8 - (col&07);
 			break;
 		default:
 			width = strlen(cp);
 		}
 		if (col + width > (screenwidth-2)) {
 			printf("\\\n\t");
 			col = 8;
 		}
 		col += width;
 		do {
 			putchar(*cp++);
 		} while (*cp);
 	}
 	if (col == 0)
 		printf("       ");
 	printf("\"\n");
 }
 
 void
 ktrgenio(struct ktr_genio *ktr, int len)
 {
 	int datalen = len - sizeof (struct ktr_genio);
 	char *dp = (char *)ktr + sizeof (struct ktr_genio);
 	static int screenwidth = 0;
 	int i, binary;
 
 	printf("fd %d %s %d byte%s\n", ktr->ktr_fd,
 		ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen,
 		datalen == 1 ? "" : "s");
 	if (suppressdata)
 		return;
 	if (screenwidth == 0) {
 		struct winsize ws;
 
 		if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 &&
 		    ws.ws_col > 8)
 			screenwidth = ws.ws_col;
 		else
 			screenwidth = 80;
 	}
 	if (maxdata && datalen > maxdata)
 		datalen = maxdata;
 
 	for (i = 0, binary = 0; i < datalen && binary == 0; i++)  {
 		if (dp[i] >= 32 && dp[i] < 127)
 			continue;
 		if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9)
 			continue;
 		binary = 1;
 	}
 	if (binary)
 		hexdump(dp, datalen, screenwidth);
 	else
 		visdump(dp, datalen, screenwidth);
 }
 
 void
 ktrpsig(struct ktr_psig *psig)
 {
 	const char *str;
 
 	print_signal(psig->signo);
 	if (psig->action == SIG_DFL) {
 		printf(" SIG_DFL");
 	} else {
 		printf(" caught handler=0x%lx mask=0x%x",
 		    (u_long)psig->action, psig->mask.__bits[0]);
 	}
 	printf(" code=");
 	str = sysdecode_sigcode(psig->signo, psig->code);
 	if (str != NULL)
 		printf("%s", str);
 	else
 		printf("<invalid=%#x>", psig->code);
 	putchar('\n');
 }
 
 void
 ktrcsw_old(struct ktr_csw_old *cs)
 {
 	printf("%s %s\n", cs->out ? "stop" : "resume",
 		cs->user ? "user" : "kernel");
 }
 
 void
 ktrcsw(struct ktr_csw *cs)
 {
 	printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume",
 	    cs->user ? "user" : "kernel", cs->wmesg);
 }
 
 void
 ktruser(int len, void *p)
 {
 	unsigned char *cp;
 
 	if (sysdecode_utrace(stdout, p, len)) {
 		printf("\n");
 		return;
 	}
 
 	printf("%d ", len);
 	cp = p;
 	while (len--)
 		if (decimal)
 			printf(" %d", *cp++);
 		else
 			printf(" %02x", *cp++);
 	printf("\n");
 }
 
 void
 ktrcaprights(cap_rights_t *rightsp)
 {
 
 	printf("cap_rights_t ");
 	sysdecode_cap_rights(stdout, rightsp);
 	printf("\n");
 }
 
 static void
 ktrtimeval(struct timeval *tv)
 {
 
 	printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec);
 }
 
 void
 ktritimerval(struct itimerval *it)
 {
 
 	printf("itimerval { .interval = ");
 	ktrtimeval(&it->it_interval);
 	printf(", .value = ");
 	ktrtimeval(&it->it_value);
 	printf(" }\n");
 }
 
 void
 ktrsockaddr(struct sockaddr *sa)
 {
 /*
  TODO: Support additional address families
 	#include <netsmb/netbios.h>
 	struct sockaddr_nb	*nb;
 */
 	const char *str;
 	char addr[64];
 
 	/*
 	 * note: ktrstruct() has already verified that sa points to a
 	 * buffer at least sizeof(struct sockaddr) bytes long and exactly
 	 * sa->sa_len bytes long.
 	 */
 	printf("struct sockaddr { ");
 	str = sysdecode_sockaddr_family(sa->sa_family);
 	if (str != NULL)
 		printf("%s", str);
 	else
 		printf("<invalid=%d>", sa->sa_family);
 	printf(", ");
 
 #define check_sockaddr_len(n)					\
 	if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) {	\
 		printf("invalid");				\
 		break;						\
 	}
 
 	switch(sa->sa_family) {
 	case AF_INET: {
 		struct sockaddr_in sa_in;
 
 		memset(&sa_in, 0, sizeof(sa_in));
 		memcpy(&sa_in, sa, sa->sa_len);
 		check_sockaddr_len(in);
 		inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr);
 		printf("%s:%u", addr, ntohs(sa_in.sin_port));
 		break;
 	}
 	case AF_INET6: {
 		struct sockaddr_in6 sa_in6;
 
 		memset(&sa_in6, 0, sizeof(sa_in6));
 		memcpy(&sa_in6, sa, sa->sa_len);
 		check_sockaddr_len(in6);
 		getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6),
 		    addr, sizeof(addr), NULL, 0, NI_NUMERICHOST);
 		printf("[%s]:%u", addr, htons(sa_in6.sin6_port));
 		break;
 	}
 	case AF_UNIX: {
 		struct sockaddr_un sa_un;
 
 		memset(&sa_un, 0, sizeof(sa_un));
 		memcpy(&sa_un, sa, sa->sa_len);
 		printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path);
 		break;
 	}
 	default:
 		printf("unknown address family");
 	}
 	printf(" }\n");
 }
 
 void
 ktrstat(struct stat *statp)
 {
 	char mode[12], timestr[PATH_MAX + 4];
 	struct passwd *pwd;
 	struct group  *grp;
 	struct tm *tm;
 
 	/*
 	 * note: ktrstruct() has already verified that statp points to a
 	 * buffer exactly sizeof(struct stat) bytes long.
 	 */
 	printf("struct stat {");
 	printf("dev=%ju, ino=%ju, ",
 		(uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino);
 	if (resolv == 0)
 		printf("mode=0%jo, ", (uintmax_t)statp->st_mode);
 	else {
 		strmode(statp->st_mode, mode);
 		printf("mode=%s, ", mode);
 	}
 	printf("nlink=%ju, ", (uintmax_t)statp->st_nlink);
 	if (resolv == 0) {
 		pwd = NULL;
 	} else {
 #ifdef HAVE_LIBCASPER
 		if (cappwd != NULL)
 			pwd = cap_getpwuid(cappwd, statp->st_uid);
 		else
 #endif
 			pwd = getpwuid(statp->st_uid);
 	}
 	if (pwd == NULL)
 		printf("uid=%ju, ", (uintmax_t)statp->st_uid);
 	else
 		printf("uid=\"%s\", ", pwd->pw_name);
 	if (resolv == 0) {
 		grp = NULL;
 	} else {
 #ifdef HAVE_LIBCASPER
 		if (capgrp != NULL)
 			grp = cap_getgrgid(capgrp, statp->st_gid);
 		else
 #endif
 			grp = getgrgid(statp->st_gid);
 	}
 	if (grp == NULL)
 		printf("gid=%ju, ", (uintmax_t)statp->st_gid);
 	else
 		printf("gid=\"%s\", ", grp->gr_name);
 	printf("rdev=%ju, ", (uintmax_t)statp->st_rdev);
 	printf("atime=");
 	if (resolv == 0)
 		printf("%jd", (intmax_t)statp->st_atim.tv_sec);
 	else {
 		tm = localtime(&statp->st_atim.tv_sec);
 		strftime(timestr, sizeof(timestr), TIME_FORMAT, tm);
 		printf("\"%s\"", timestr);
 	}
 	if (statp->st_atim.tv_nsec != 0)
 		printf(".%09ld, ", statp->st_atim.tv_nsec);
 	else
 		printf(", ");
 	printf("mtime=");
 	if (resolv == 0)
 		printf("%jd", (intmax_t)statp->st_mtim.tv_sec);
 	else {
 		tm = localtime(&statp->st_mtim.tv_sec);
 		strftime(timestr, sizeof(timestr), TIME_FORMAT, tm);
 		printf("\"%s\"", timestr);
 	}
 	if (statp->st_mtim.tv_nsec != 0)
 		printf(".%09ld, ", statp->st_mtim.tv_nsec);
 	else
 		printf(", ");
 	printf("ctime=");
 	if (resolv == 0)
 		printf("%jd", (intmax_t)statp->st_ctim.tv_sec);
 	else {
 		tm = localtime(&statp->st_ctim.tv_sec);
 		strftime(timestr, sizeof(timestr), TIME_FORMAT, tm);
 		printf("\"%s\"", timestr);
 	}
 	if (statp->st_ctim.tv_nsec != 0)
 		printf(".%09ld, ", statp->st_ctim.tv_nsec);
 	else
 		printf(", ");
 	printf("birthtime=");
 	if (resolv == 0)
 		printf("%jd", (intmax_t)statp->st_birthtim.tv_sec);
 	else {
 		tm = localtime(&statp->st_birthtim.tv_sec);
 		strftime(timestr, sizeof(timestr), TIME_FORMAT, tm);
 		printf("\"%s\"", timestr);
 	}
 	if (statp->st_birthtim.tv_nsec != 0)
 		printf(".%09ld, ", statp->st_birthtim.tv_nsec);
 	else
 		printf(", ");
 	printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x",
 		(uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize,
 		(intmax_t)statp->st_blocks, statp->st_flags);
 	printf(" }\n");
 }
 
 void
 ktrstruct(char *buf, size_t buflen)
 {
 	char *name, *data;
 	size_t namelen, datalen;
 	int i;
 	cap_rights_t rights;
 	struct itimerval it;
 	struct stat sb;
 	struct sockaddr_storage ss;
 
 	for (name = buf, namelen = 0;
 	     namelen < buflen && name[namelen] != '\0';
 	     ++namelen)
 		/* nothing */;
 	if (namelen == buflen)
 		goto invalid;
 	if (name[namelen] != '\0')
 		goto invalid;
 	data = buf + namelen + 1;
 	datalen = buflen - namelen - 1;
 	if (datalen == 0)
 		goto invalid;
 	/* sanity check */
 	for (i = 0; i < (int)namelen; ++i)
 		if (!isalpha(name[i]))
 			goto invalid;
 	if (strcmp(name, "caprights") == 0) {
 		if (datalen != sizeof(cap_rights_t))
 			goto invalid;
 		memcpy(&rights, data, datalen);
 		ktrcaprights(&rights);
 	} else if (strcmp(name, "itimerval") == 0) {
 		if (datalen != sizeof(struct itimerval))
 			goto invalid;
 		memcpy(&it, data, datalen);
 		ktritimerval(&it);
 	} else if (strcmp(name, "stat") == 0) {
 		if (datalen != sizeof(struct stat))
 			goto invalid;
 		memcpy(&sb, data, datalen);
 		ktrstat(&sb);
 	} else if (strcmp(name, "sockaddr") == 0) {
 		if (datalen > sizeof(ss))
 			goto invalid;
 		memcpy(&ss, data, datalen);
 		if (datalen != ss.ss_len)
 			goto invalid;
 		ktrsockaddr((struct sockaddr *)&ss);
 	} else {
 		printf("unknown structure\n");
 	}
 	return;
 invalid:
 	printf("invalid record\n");
 }
 
 void
 ktrcapfail(struct ktr_cap_fail *ktr)
 {
 	switch (ktr->cap_type) {
 	case CAPFAIL_NOTCAPABLE:
 		/* operation on fd with insufficient capabilities */
 		printf("operation requires ");
 		sysdecode_cap_rights(stdout, &ktr->cap_needed);
 		printf(", descriptor holds ");
 		sysdecode_cap_rights(stdout, &ktr->cap_held);
 		break;
 	case CAPFAIL_INCREASE:
 		/* requested more capabilities than fd already has */
 		printf("attempt to increase capabilities from ");
 		sysdecode_cap_rights(stdout, &ktr->cap_held);
 		printf(" to ");
 		sysdecode_cap_rights(stdout, &ktr->cap_needed);
 		break;
 	case CAPFAIL_SYSCALL:
 		/* called restricted syscall */
 		printf("disallowed system call");
 		break;
 	case CAPFAIL_LOOKUP:
 		/* used ".." in strict-relative mode */
 		printf("restricted VFS lookup");
 		break;
 	default:
 		printf("unknown capability failure: ");
 		sysdecode_cap_rights(stdout, &ktr->cap_needed);
 		printf(" ");
 		sysdecode_cap_rights(stdout, &ktr->cap_held);
 		break;
 	}
 	printf("\n");
 }
 
 void
 ktrfault(struct ktr_fault *ktr)
 {
 
 	printf("0x%jx ", (uintmax_t)ktr->vaddr);
 	print_mask_arg(sysdecode_vmprot, ktr->type);
 	printf("\n");
 }
 
 void
 ktrfaultend(struct ktr_faultend *ktr)
 {
 	const char *str;
 
 	str = sysdecode_vmresult(ktr->result);
 	if (str != NULL)
 		printf("%s", str);
 	else
 		printf("<invalid=%d>", ktr->result);
 	printf("\n");
 }
 
 void
 usage(void)
 {
 	fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] "
 	    "[-m maxdata] [-p pid] [-t trstr]\n");
 	exit(1);
 }
Index: head/usr.bin/lastcomm/lastcomm.c
===================================================================
--- head/usr.bin/lastcomm/lastcomm.c	(revision 318735)
+++ head/usr.bin/lastcomm/lastcomm.c	(revision 318736)
@@ -1,275 +1,275 @@
 /*
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)lastcomm.c	8.1 (Berkeley) 6/6/93";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/acct.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 #include "pathnames.h"
 
 /*XXX*/#include <inttypes.h>
 
 time_t	 expand(u_int);
 char	*flagbits(int);
 const	 char *getdev(dev_t);
-int	 readrec_forward(FILE *f, struct acctv2 *av2);
-int	 readrec_backward(FILE *f, struct acctv2 *av2);
-int	 requested(char *[], struct acctv2 *);
+int	 readrec_forward(FILE *f, struct acctv3 *av3);
+int	 readrec_backward(FILE *f, struct acctv3 *av3);
+int	 requested(char *[], struct acctv3 *);
 static	 void usage(void);
 
 #define AC_UTIME 1 /* user */
 #define AC_STIME 2 /* system */
 #define AC_ETIME 4 /* elapsed */
 #define AC_CTIME 8 /* user + system time, default */
 
 #define AC_BTIME 16 /* starting time */
 #define AC_FTIME 32 /* exit time (starting time + elapsed time )*/
 
 int
 main(int argc, char *argv[])
 {
-	struct acctv2 ab;
+	struct acctv3 ab;
 	char *p;
 	FILE *fp;
-	int (*readrec)(FILE *f, struct acctv2 *av2);
+	int (*readrec)(FILE *f, struct acctv3 *av3);
 	time_t t;
 	int ch, rv;
 	const char *acctfile, *format;
 	char buf[1024];
 	int flags = 0;
 
 	acctfile = _PATH_ACCT;
 	format = NULL;
 	while ((ch = getopt(argc, argv, "f:usecSE")) != -1)
 		switch((char)ch) {
 		case 'f':
 			acctfile = optarg;
 			break;
 
 		case 'u': 
 			flags |= AC_UTIME; /* user time */
 			break;
 		case 's':
 			flags |= AC_STIME; /* system time */
 			break;
 		case 'e':
 			flags |= AC_ETIME; /* elapsed time */
 			break;
         	case 'c':
                         flags |= AC_CTIME; /* user + system time */
 			break;
 
         	case 'S':
                         flags |= AC_BTIME; /* starting time */
 			break;
         	case 'E':
 			/* exit time (starting time + elapsed time )*/
                         flags |= AC_FTIME; 
 			break;
 
 		case '?':
 		default:
 			usage();
 		}
 
 	/* default user + system time and starting time */
 	if (!flags) {
 	    flags = AC_CTIME | AC_BTIME;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc > 0 && **argv == '+') {
 		format = *argv + 1; /* skip + */
 		argc--;
 		argv++;
 	}
 
 	if (strcmp(acctfile, "-") == 0) {
 		fp = stdin;
 		readrec = readrec_forward;
 	} else {
 		/* Open the file. */
 		if ((fp = fopen(acctfile, "r")) == NULL)
 			err(1, "could not open %s", acctfile);
 		if (fseek(fp, 0l, SEEK_END) == -1)
 			err(1, "seek to end of %s failed", acctfile);
 		readrec = readrec_backward;
 	}
 
 	while ((rv = readrec(fp, &ab)) == 1) {
 		for (p = &ab.ac_comm[0];
 		    p < &ab.ac_comm[AC_COMM_LEN] && *p; ++p)
 			if (!isprint(*p))
 				*p = '?';
 
 		if (*argv && !requested(argv, &ab))
 			continue;
 
 		(void)printf("%-*.*s %-7s %-*s %-8s",
 			     AC_COMM_LEN, AC_COMM_LEN, ab.ac_comm,
 			     flagbits(ab.ac_flagx),
 			     MAXLOGNAME - 1, user_from_uid(ab.ac_uid, 0),
 			     getdev(ab.ac_tty));
 		
 		
 		/* user + system time */
 		if (flags & AC_CTIME) {
 			(void)printf(" %6.3f secs", 
 			    (ab.ac_utime + ab.ac_stime) / 1000000);
 		}
 		
 		/* usr time */
 		if (flags & AC_UTIME) {
 			(void)printf(" %6.3f us", ab.ac_utime / 1000000);
 		}
 		
 		/* system time */
 		if (flags & AC_STIME) {
 			(void)printf(" %6.3f sy", ab.ac_stime / 1000000);
 		}
 		
 		/* elapsed time */
 		if (flags & AC_ETIME) {
 			(void)printf(" %8.3f es", ab.ac_etime / 1000000);
 		}
 		
 		/* starting time */
 		if (flags & AC_BTIME) {
 			if (format != NULL) {
 				(void)strftime(buf, sizeof(buf), format,
 				    localtime(&ab.ac_btime));
 				(void)printf(" %s", buf);
 			} else
 				(void)printf(" %.16s", ctime(&ab.ac_btime));
 		}
 		
 		/* exit time (starting time + elapsed time )*/
 		if (flags & AC_FTIME) {
 			t = ab.ac_btime;
 			t += (time_t)(ab.ac_etime / 1000000);
 			if (format != NULL) {
 				(void)strftime(buf, sizeof(buf), format,
 				    localtime(&t));
 				(void)printf(" %s", buf);
 			} else
 				(void)printf(" %.16s", ctime(&t));
 		}
 		printf("\n");
  	}
 	if (rv == EOF)
 		err(1, "read record from %s failed", acctfile);
 
 	if (fflush(stdout))
 		err(1, "stdout");
  	exit(0);
 }
 
 char *
 flagbits(int f)
 {
 	static char flags[20] = "-";
 	char *p;
 
 #define	BIT(flag, ch)	if (f & flag) *p++ = ch
 
 	p = flags + 1;
 	BIT(ASU, 'S');
 	BIT(AFORK, 'F');
 	BIT(ACOMPAT, 'C');
 	BIT(ACORE, 'D');
 	BIT(AXSIG, 'X');
 	*p = '\0';
 	return (flags);
 }
 
 int
-requested(char *argv[], struct acctv2 *acp)
+requested(char *argv[], struct acctv3 *acp)
 {
 	const char *p;
 
 	do {
 		p = user_from_uid(acp->ac_uid, 0);
 		if (!strcmp(p, *argv))
 			return (1);
 		if ((p = getdev(acp->ac_tty)) && !strcmp(p, *argv))
 			return (1);
 		if (!strncmp(acp->ac_comm, *argv, AC_COMM_LEN))
 			return (1);
 	} while (*++argv);
 	return (0);
 }
 
 const char *
 getdev(dev_t dev)
 {
 	static dev_t lastdev = (dev_t)-1;
 	static const char *lastname;
 
 	if (dev == NODEV)			/* Special case. */
 		return ("__");
 	if (dev == lastdev)			/* One-element cache. */
 		return (lastname);
 	lastdev = dev;
 	lastname = devname(dev, S_IFCHR);
 	return (lastname);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "usage: lastcomm [-EScesu] [-f file] [+format] [command ...] "
 	    "[user ...] [terminal ...]\n");
 	exit(1);
 }
Index: head/usr.bin/lastcomm/readrec.c
===================================================================
--- head/usr.bin/lastcomm/readrec.c	(revision 318735)
+++ head/usr.bin/lastcomm/readrec.c	(revision 318736)
@@ -1,227 +1,263 @@
 /*-
  * Copyright (c) 2007 Diomidis Spinellis
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/acct.h>
 
 #include <errno.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
 
-int	 readrec_forward(FILE *f, struct acctv2 *av2);
-int	 readrec_backward(FILE *f, struct acctv2 *av2);
+int	 readrec_forward(FILE *f, struct acctv3 *av2);
+int	 readrec_backward(FILE *f, struct acctv3 *av2);
 
 /*
  * Reverse offsetof: return the offset of field f
  * from the end of the structure s.
  */
 #define roffsetof(s, f) (sizeof(s) - offsetof(s, f))
 
 /*
  * Read exactly one record of size size from stream f into ptr.
  * Failure to read the complete record is considered a file format error,
  * and will set errno to EFTYPE.
  * Return 0 on success, EOF on end of file or error.
  */
 static int
 fread_record(void *ptr, size_t size, FILE *f)
 {
 	size_t rv;
 
 	if ((rv = fread(ptr, 1, size, f)) == size)
 		return (0);
 	else if (ferror(f) || rv == 0)
 		return (EOF);
 	else {
 		/* Short read. */
 		errno = EFTYPE;
 		return (EOF);
 	}
 }
 
 /*
  * Return the value of a comp_t field.
  */
 static float
 decode_comp(comp_t v)
 {
 	int result, exp;
 
 	result = v & 017777;
 	for (exp = v >> 13; exp; exp--)
 		result <<= 3;
 	return ((double)result / AHZV1);
 }
 
 /*
  * Read a v1 accounting record stored at the current
  * position of stream f.
  * Convert the data to the current record format.
  * Return EOF on error or end-of-file.
  */
 static int
-readrec_v1(FILE *f, struct acctv2 *av2)
+readrec_v1(FILE *f, struct acctv3 *av3)
 {
 	struct acctv1 av1;
 	int rv;
 
 	if ((rv = fread_record(&av1, sizeof(av1), f)) == EOF)
 		return (EOF);
-	av2->ac_zero = 0;
-	av2->ac_version = 2;
-	av2->ac_len = av2->ac_len2 = sizeof(*av2);
-	memcpy(av2->ac_comm, av1.ac_comm, AC_COMM_LEN);
-	av2->ac_utime = decode_comp(av1.ac_utime) * 1000000;
-	av2->ac_stime = decode_comp(av1.ac_stime) * 1000000;
-	av2->ac_etime = decode_comp(av1.ac_etime) * 1000000;
-	av2->ac_btime = av1.ac_btime;
-	av2->ac_uid = av1.ac_uid;
-	av2->ac_gid = av1.ac_gid;
-	av2->ac_mem = av1.ac_mem;
-	av2->ac_io = decode_comp(av1.ac_io);
-	av2->ac_tty = av1.ac_tty;
-	av2->ac_flagx = av1.ac_flag | ANVER;
+	av3->ac_zero = 0;
+	av3->ac_version = 3;
+	av3->ac_len = av3->ac_len2 = sizeof(*av3);
+	memcpy(av3->ac_comm, av1.ac_comm, AC_COMM_LEN);
+	av3->ac_utime = decode_comp(av1.ac_utime) * 1000000;
+	av3->ac_stime = decode_comp(av1.ac_stime) * 1000000;
+	av3->ac_etime = decode_comp(av1.ac_etime) * 1000000;
+	av3->ac_btime = av1.ac_btime;
+	av3->ac_uid = av1.ac_uid;
+	av3->ac_gid = av1.ac_gid;
+	av3->ac_mem = av1.ac_mem;
+	av3->ac_io = decode_comp(av1.ac_io);
+	av3->ac_tty = av1.ac_tty;
+	av3->ac_flagx = av1.ac_flag | ANVER;
 	return (0);
 }
 
 /*
  * Read an v2 accounting record stored at the current
  * position of stream f.
  * Return EOF on error or end-of-file.
  */
 static int
-readrec_v2(FILE *f, struct acctv2 *av2)
+readrec_v2(FILE *f, struct acctv3 *av3)
 {
-	return (fread_record(av2, sizeof(*av2), f));
+	struct acctv2 av2;
+	int rv;
+
+	if ((rv = fread_record(&av2, sizeof(av2), f)) == EOF)
+		return (EOF);
+	av3->ac_zero = 0;
+	av3->ac_version = 3;
+	av3->ac_len = av3->ac_len2 = sizeof(*av3);
+	memcpy(av3->ac_comm, av2.ac_comm, AC_COMM_LEN);
+	av3->ac_utime = av2.ac_utime;
+	av3->ac_stime = av2.ac_stime;
+	av3->ac_etime = av2.ac_etime;
+	av3->ac_btime = av2.ac_btime;
+	av3->ac_uid = av2.ac_uid;
+	av3->ac_gid = av2.ac_gid;
+	av3->ac_mem = av2.ac_mem;
+	av3->ac_io = av2.ac_io;
+	av3->ac_tty = av2.ac_tty;
+	av3->ac_flagx = av2.ac_flagx;
+	return (0);
 }
 
 /*
+ * Read an v2 accounting record stored at the current
+ * position of stream f.
+ * Return EOF on error or end-of-file.
+ */
+static int
+readrec_v3(FILE *f, struct acctv3 *av3)
+{
+
+	return (fread_record(av3, sizeof(*av3), f));
+}
+
+/*
  * Read a new-style (post-v1) accounting record stored at
  * the current position of stream f.
  * Convert the data to the current record format.
  * Return EOF on error or end-of-file.
  */
 static int
-readrec_vx(FILE *f, struct acctv2 *av2)
+readrec_vx(FILE *f, struct acctv3 *av3)
 {
 	uint8_t magic, version;
 
 	if (fread_record(&magic, sizeof(magic), f) == EOF ||
 	    fread_record(&version, sizeof(version), f) == EOF ||
 	    ungetc(version, f) == EOF ||
 	    ungetc(magic, f) == EOF)
 		return (EOF);
 	switch (version) {
 	case 2:
-		return (readrec_v2(f, av2));
+		return (readrec_v2(f, av3));
+	case 3:
+		return (readrec_v3(f, av3));
 
 	/* Add handling for more versions here. */
 
 	default:
 		errno = EFTYPE;
 		return (EOF);
 	}
 }
 
 /*
  * Read an accounting record stored at the current
  * position of stream f.
  * Old-format records are converted to the current record
  * format.
  * Return the number of records read (1 or 0 at the end-of-file),
  * or EOF on error.
  */
 int
-readrec_forward(FILE *f, struct acctv2 *av2)
+readrec_forward(FILE *f, struct acctv3 *av3)
 {
 	int magic, rv;
 
 	if ((magic = getc(f)) == EOF)
 		return (ferror(f) ? EOF : 0);
 	if (ungetc(magic, f) == EOF)
 		return (EOF);
 	if (magic != 0)
 		/* Old record format. */
-		rv = readrec_v1(f, av2);
+		rv = readrec_v1(f, av3);
 	else
 		/* New record formats. */
-		rv = readrec_vx(f, av2);
+		rv = readrec_vx(f, av3);
 	return (rv == EOF ? EOF : 1);
 }
 
 /*
  * Read an accounting record ending at the current
  * position of stream f.
  * Old-format records are converted to the current record
  * format.
  * The file pointer is positioned at the beginning of the
  * record read.
  * Return the number of records read (1 or 0 at the end-of-file),
  * or EOF on error.
  */
 int
-readrec_backward(FILE *f, struct acctv2 *av2)
+readrec_backward(FILE *f, struct acctv3 *av3)
 {
 	off_t pos;
 	int c;
 	uint16_t len;
 
 	if ((pos = ftell(f)) == -1)
 		return (EOF);
 	if (pos == 0)
 		return (0);
-	if (fseek(f, -roffsetof(struct acctv2, ac_trailer),
+	if (fseek(f, -roffsetof(struct acctv3, ac_trailer),
 	    SEEK_CUR) == EOF ||
 	    (c = getc(f)) == EOF)
 		return (EOF);
 	if (c & ANVER) {
-		/* New record formats. */
+		/*
+		 * New record formats.  For v2 and v3 offset from the
+		 * end for ac_len2 should be same.
+		 */
 		if (fseeko(f, pos - roffsetof(struct acctv2, ac_len2),
 		    SEEK_SET) == EOF ||
 		    fread_record(&len, sizeof(len), f) == EOF ||
 		    fseeko(f, pos - len, SEEK_SET) == EOF ||
-		    readrec_vx(f, av2) == EOF ||
+		    readrec_vx(f, av3) == EOF ||
 		    fseeko(f, pos - len, SEEK_SET) == EOF)
 			return (EOF);
 		else
 			return (1);
 	} else {
 		/* Old record format. */
 		if (fseeko(f, pos - sizeof(struct acctv1), SEEK_SET) == EOF ||
-		    readrec_v1(f, av2) == EOF ||
+		    readrec_v1(f, av3) == EOF ||
 		    fseeko(f, pos - sizeof(struct acctv1), SEEK_SET) == EOF)
 			return (EOF);
 		else
 			return (1);
 	}
 }
Index: head/usr.sbin/pstat/pstat.c
===================================================================
--- head/usr.sbin/pstat/pstat.c	(revision 318735)
+++ head/usr.sbin/pstat/pstat.c	(revision 318736)
@@ -1,595 +1,595 @@
 /*-
  * Copyright (c) 1980, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2002 Networks Associates Technologies, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed for the FreeBSD Project by
  * ThinkSec AS and NAI Labs, the Security Research Division of Network
  * Associates, Inc.  under DARPA/SPAWAR contract N66001-01-C-8035
  * ("CBOSS"), as part of the DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1991, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 static char sccsid[] = "@(#)pstat.c	8.16 (Berkeley) 5/9/95";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/stdint.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/blist.h>
 
 #include <sys/sysctl.h>
 #include <vm/vm_param.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <kvm.h>
 #include <libutil.h>
 #include <limits.h>
 #include <nlist.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 enum {
 	NL_CONSTTY,
 	NL_MAXFILES,
 	NL_NFILES,
 	NL_TTY_LIST,
 	NL_MARKER
 };
 
 static struct {
 	int order;
 	const char *name;
 } namelist[] = {
 	{ NL_CONSTTY, "_constty" },
 	{ NL_MAXFILES, "_maxfiles" },
 	{ NL_NFILES, "_openfiles" },
 	{ NL_TTY_LIST, "_tty_list" },
 	{ NL_MARKER, "" },
 };
 #define NNAMES	(sizeof(namelist) / sizeof(*namelist))
 static struct nlist nl[NNAMES];
 
 static int	humanflag;
 static int	usenumflag;
 static int	totalflag;
 static int	swapflag;
 static char	*nlistf;
 static char	*memf;
 static kvm_t	*kd;
 
 static const char *usagestr;
 
 static void	filemode(void);
 static int	getfiles(struct xfile **, size_t *);
 static void	swapmode(void);
 static void	ttymode(void);
 static void	ttyprt(struct xtty *);
 static void	usage(void);
 
 int
 main(int argc, char *argv[])
 {
 	int ch, quit, ret;
 	int fileflag, ttyflag;
 	unsigned int i;
 	char buf[_POSIX2_LINE_MAX];
 	const char *opts;
 
 	fileflag = swapflag = ttyflag = 0;
 
 	/* We will behave like good old swapinfo if thus invoked */
 	opts = strrchr(argv[0], '/');
 	if (opts)
 		opts++;
 	else
 		opts = argv[0];
 	if (!strcmp(opts, "swapinfo")) {
 		swapflag = 1;
 		opts = "ghkmM:N:";
 		usagestr = "swapinfo [-ghkm] [-M core [-N system]]";
 	} else {
 		opts = "TM:N:fghkmnst";
 		usagestr = "pstat [-Tfghkmnst] [-M core [-N system]]";
 	}
 
 	while ((ch = getopt(argc, argv, opts)) != -1)
 		switch (ch) {
 		case 'f':
 			fileflag = 1;
 			break;
 		case 'g':
 			setenv("BLOCKSIZE", "1G", 1);
 			break;
 		case 'h':
 			humanflag = 1;
 			break;
 		case 'k':
 			setenv("BLOCKSIZE", "1K", 1);
 			break;
 		case 'm':
 			setenv("BLOCKSIZE", "1M", 1);
 			break;
 		case 'M':
 			memf = optarg;
 			break;
 		case 'N':
 			nlistf = optarg;
 			break;
 		case 'n':
 			usenumflag = 1;
 			break;
 		case 's':
 			++swapflag;
 			break;
 		case 'T':
 			totalflag = 1;
 			break;
 		case 't':
 			ttyflag = 1;
 			break;
 		default:
 			usage();
 		}
 
 	/*
 	 * Initialize symbol names list.
 	 */
 	for (i = 0; i < NNAMES; i++)
 		nl[namelist[i].order].n_name = strdup(namelist[i].name);
 
 	if (memf != NULL) {
 		kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, buf);
 		if (kd == NULL)
 			errx(1, "kvm_openfiles: %s", buf);
 		if ((ret = kvm_nlist(kd, nl)) != 0) {
 			if (ret == -1)
 				errx(1, "kvm_nlist: %s", kvm_geterr(kd));
 			quit = 0;
 			for (i = 0; nl[i].n_name[0] != '\0'; ++i)
 				if (nl[i].n_value == 0) {
 					quit = 1;
 					warnx("undefined symbol: %s",
 					    nl[i].n_name);
 				}
 			if (quit)
 				exit(1);
 		}
 	}
 	if (!(fileflag | ttyflag | swapflag | totalflag))
 		usage();
 	if (fileflag || totalflag)
 		filemode();
 	if (ttyflag)
 		ttymode();
 	if (swapflag || totalflag)
 		swapmode();
 	exit (0);
 }
 
 static void
 usage(void)
 {
 	fprintf(stderr, "usage: %s\n", usagestr);
 	exit (1);
 }
 
 static const char fhdr32[] =
   "   LOC   TYPE   FLG  CNT MSG   DATA        OFFSET\n";
 /* c0000000 ------ RWAI 123 123 c0000000 1000000000000000 */
 
 static const char fhdr64[] =
   "       LOC       TYPE   FLG  CNT MSG       DATA            OFFSET\n";
 /* c000000000000000 ------ RWAI 123 123 c000000000000000 1000000000000000 */
 
 static const char hdr[] =
 "      LINE   INQ  CAN  LIN  LOW  OUTQ  USE  LOW   COL  SESS  PGID STATE\n";
 
 static void
 ttymode_kvm(void)
 {
 	TAILQ_HEAD(, tty) tl;
 	struct tty *tp, tty;
 	struct xtty xt;
 
 	(void)printf("%s", hdr);
 	bzero(&xt, sizeof xt);
 	xt.xt_size = sizeof xt;
 	if (kvm_read(kd, nl[NL_TTY_LIST].n_value, &tl, sizeof tl) != sizeof tl)
 		errx(1, "kvm_read(): %s", kvm_geterr(kd));
 	tp = TAILQ_FIRST(&tl);
 	while (tp != NULL) {
 		if (kvm_read(kd, (u_long)tp, &tty, sizeof tty) != sizeof tty)
 			errx(1, "kvm_read(): %s", kvm_geterr(kd));
 		xt.xt_insize = tty.t_inq.ti_nblocks * TTYINQ_DATASIZE;
 		xt.xt_incc = tty.t_inq.ti_linestart - tty.t_inq.ti_begin;
 		xt.xt_inlc = tty.t_inq.ti_end - tty.t_inq.ti_linestart;
 		xt.xt_inlow = tty.t_inlow;
 		xt.xt_outsize = tty.t_outq.to_nblocks * TTYOUTQ_DATASIZE;
 		xt.xt_outcc = tty.t_outq.to_end - tty.t_outq.to_begin;
 		xt.xt_outlow = tty.t_outlow;
 		xt.xt_column = tty.t_column;
 		/* xt.xt_pgid = ... */
 		/* xt.xt_sid = ... */
 		xt.xt_flags = tty.t_flags;
-		xt.xt_dev = NODEV;
+		xt.xt_dev = (uint32_t)NODEV;
 		ttyprt(&xt);
 		tp = TAILQ_NEXT(&tty, t_list);
 	}
 }
 
 static void
 ttymode_sysctl(void)
 {
 	struct xtty *xttys;
 	size_t len;
 	unsigned int i, n;
 
 	(void)printf("%s", hdr);
 	if ((xttys = malloc(len = sizeof(*xttys))) == NULL)
 		err(1, "malloc()");
 	while (sysctlbyname("kern.ttys", xttys, &len, 0, 0) == -1) {
 		if (errno != ENOMEM)
 			err(1, "sysctlbyname()");
 		len *= 2;
 		if ((xttys = realloc(xttys, len)) == NULL)
 			err(1, "realloc()");
 	}
 	n = len / sizeof(*xttys);
 	for (i = 0; i < n; i++)
 		ttyprt(&xttys[i]);
 }
 
 static void
 ttymode(void)
 {
 
 	if (kd != NULL)
 		ttymode_kvm();
 	else
 		ttymode_sysctl();
 }
 
 static struct {
 	int flag;
 	char val;
 } ttystates[] = {
 #if 0
 	{ TF_NOPREFIX,		'N' },
 #endif
 	{ TF_INITLOCK,		'I' },
 	{ TF_CALLOUT,		'C' },
 
 	/* Keep these together -> 'Oi' and 'Oo'. */
 	{ TF_OPENED,		'O' },
 	{ TF_OPENED_IN,		'i' },
 	{ TF_OPENED_OUT,	'o' },
 	{ TF_OPENED_CONS,	'c' },
 
 	{ TF_GONE,		'G' },
 	{ TF_OPENCLOSE,		'B' },
 	{ TF_ASYNC,		'Y' },
 	{ TF_LITERAL,		'L' },
 
 	/* Keep these together -> 'Hi' and 'Ho'. */
 	{ TF_HIWAT,		'H' },
 	{ TF_HIWAT_IN,		'i' },
 	{ TF_HIWAT_OUT,		'o' },
 
 	{ TF_STOPPED,		'S' },
 	{ TF_EXCLUDE,		'X' },
 	{ TF_BYPASS,		'l' },
 	{ TF_ZOMBIE,		'Z' },
 	{ TF_HOOK,		's' },
 
 	/* Keep these together -> 'bi' and 'bo'. */
 	{ TF_BUSY,		'b' },
 	{ TF_BUSY_IN,		'i' },
 	{ TF_BUSY_OUT,		'o' },
 
 	{ 0,			'\0'},
 };
 
 static void
 ttyprt(struct xtty *xt)
 {
 	int i, j;
 	const char *name;
 
 	if (xt->xt_size != sizeof *xt)
 		errx(1, "struct xtty size mismatch");
 	if (usenumflag || xt->xt_dev == 0 ||
 	   (name = devname(xt->xt_dev, S_IFCHR)) == NULL)
 		printf("%#10jx ", (uintmax_t)xt->xt_dev);
 	else
 		printf("%10s ", name);
 	printf("%5zu %4zu %4zu %4zu %5zu %4zu %4zu %5u %5d %5d ",
 	    xt->xt_insize, xt->xt_incc, xt->xt_inlc,
 	    (xt->xt_insize - xt->xt_inlow), xt->xt_outsize,
 	    xt->xt_outcc, (xt->xt_outsize - xt->xt_outlow),
 	    MIN(xt->xt_column, 99999), xt->xt_sid, xt->xt_pgid);
 	for (i = j = 0; ttystates[i].flag; i++)
 		if (xt->xt_flags & ttystates[i].flag) {
 			putchar(ttystates[i].val);
 			j++;
 		}
 	if (j == 0)
 		putchar('-');
 	putchar('\n');
 }
 
 static void
 filemode(void)
 {
 	struct xfile *fp, *buf;
 	char flagbuf[16], *fbp;
 	int maxf, openf;
 	size_t len;
 	static char const * const dtypes[] = { "???", "inode", "socket",
 	    "pipe", "fifo", "kqueue", "crypto" };
 	int i;
 	int wid;
 
 	if (kd != NULL) {
 		if (kvm_read(kd, nl[NL_MAXFILES].n_value,
 			&maxf, sizeof maxf) != sizeof maxf ||
 		    kvm_read(kd, nl[NL_NFILES].n_value,
 			&openf, sizeof openf) != sizeof openf)
 			errx(1, "kvm_read(): %s", kvm_geterr(kd));
 	} else {
 		len = sizeof(int);
 		if (sysctlbyname("kern.maxfiles", &maxf, &len, 0, 0) == -1 ||
 		    sysctlbyname("kern.openfiles", &openf, &len, 0, 0) == -1)
 			err(1, "sysctlbyname()");
 	}
 
 	if (totalflag) {
 		(void)printf("%3d/%3d files\n", openf, maxf);
 		return;
 	}
 	if (getfiles(&buf, &len) == -1)
 		return;
 	openf = len / sizeof *fp;
 
 	(void)printf("%d/%d open files\n", openf, maxf);
 	printf(sizeof(uintptr_t) == 4 ? fhdr32 : fhdr64);
 	wid = (int)sizeof(uintptr_t) * 2;
 	for (fp = (struct xfile *)buf, i = 0; i < openf; ++fp, ++i) {
 		if ((size_t)fp->xf_type >= sizeof(dtypes) / sizeof(dtypes[0]))
 			continue;
 		(void)printf("%*jx", wid, (uintmax_t)(uintptr_t)fp->xf_file);
 		(void)printf(" %-6.6s", dtypes[fp->xf_type]);
 		fbp = flagbuf;
 		if (fp->xf_flag & FREAD)
 			*fbp++ = 'R';
 		if (fp->xf_flag & FWRITE)
 			*fbp++ = 'W';
 		if (fp->xf_flag & FAPPEND)
 			*fbp++ = 'A';
 		if (fp->xf_flag & FASYNC)
 			*fbp++ = 'I';
 		*fbp = '\0';
 		(void)printf(" %4s %3d", flagbuf, fp->xf_count);
 		(void)printf(" %3d", fp->xf_msgcount);
 		(void)printf(" %*jx", wid, (uintmax_t)(uintptr_t)fp->xf_data);
 		(void)printf(" %*jx\n", (int)sizeof(fp->xf_offset) * 2,
 		    (uintmax_t)fp->xf_offset);
 	}
 	free(buf);
 }
 
 static int
 getfiles(struct xfile **abuf, size_t *alen)
 {
 	struct xfile *buf;
 	size_t len;
 	int mib[2];
 
 	/*
 	 * XXX
 	 * Add emulation of KINFO_FILE here.
 	 */
 	if (kd != NULL)
 		errx(1, "files on dead kernel, not implemented");
 
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_FILE;
 	if (sysctl(mib, 2, NULL, &len, NULL, 0) == -1) {
 		warn("sysctl: KERN_FILE");
 		return (-1);
 	}
 	if ((buf = malloc(len)) == NULL)
 		errx(1, "malloc");
 	if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) {
 		warn("sysctl: KERN_FILE");
 		return (-1);
 	}
 	*abuf = buf;
 	*alen = len;
 	return (0);
 }
 
 /*
  * swapmode is based on a program called swapinfo written
  * by Kevin Lahey <kml@rokkaku.atl.ga.us>.
  */
 
 #define CONVERT(v)	((int64_t)(v) * pagesize / blocksize)
 #define CONVERT_BLOCKS(v)	((int64_t)(v) * pagesize)
 static struct kvm_swap swtot;
 static int nswdev;
 
 static void
 print_swap_header(void)
 {
 	int hlen;
 	long blocksize;
 	const char *header;
 
 	header = getbsize(&hlen, &blocksize);
 	if (totalflag == 0)
 		(void)printf("%-15s %*s %8s %8s %8s\n",
 		    "Device", hlen, header,
 		    "Used", "Avail", "Capacity");
 }
 
 static void
 print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused,
     intmax_t bavail, float bpercent)
 {
 	char usedbuf[5];
 	char availbuf[5];
 	int hlen, pagesize;
 	long blocksize;
 
 	pagesize = getpagesize();
 	getbsize(&hlen, &blocksize);
 
 	printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks));
 	if (humanflag) {
 		humanize_number(usedbuf, sizeof(usedbuf),
 		    CONVERT_BLOCKS(bused), "",
 		    HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
 		humanize_number(availbuf, sizeof(availbuf),
 		    CONVERT_BLOCKS(bavail), "",
 		    HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
 		printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent);
 	} else {
 		printf("%8jd %8jd %5.0f%%\n", (intmax_t)CONVERT(bused),
 		    (intmax_t)CONVERT(bavail), bpercent);
 	}
 }
 
 static void
 print_swap(struct kvm_swap *ksw)
 {
 
 	swtot.ksw_total += ksw->ksw_total;
 	swtot.ksw_used += ksw->ksw_used;
 	++nswdev;
 	if (totalflag == 0)
 		print_swap_line(ksw->ksw_devname, ksw->ksw_total,
 		    ksw->ksw_used, ksw->ksw_total - ksw->ksw_used,
 		    (ksw->ksw_used * 100.0) / ksw->ksw_total);
 }
 
 static void
 print_swap_total(void)
 {
 	int hlen, pagesize;
 	long blocksize;
 
 	pagesize = getpagesize();
 	getbsize(&hlen, &blocksize);
 	if (totalflag) {
 		blocksize = 1024 * 1024;
 		(void)printf("%jdM/%jdM swap space\n",
 		    CONVERT(swtot.ksw_used), CONVERT(swtot.ksw_total));
 	} else if (nswdev > 1) {
 		print_swap_line("Total", swtot.ksw_total, swtot.ksw_used,
 		    swtot.ksw_total - swtot.ksw_used,
 		    (swtot.ksw_used * 100.0) / swtot.ksw_total);
 	}
 }
 
 static void
 swapmode_kvm(void)
 {
 	struct kvm_swap kswap[16];
 	int i, n;
 
 	n = kvm_getswapinfo(kd, kswap, sizeof kswap / sizeof kswap[0],
 	    SWIF_DEV_PREFIX);
 
 	print_swap_header();
 	for (i = 0; i < n; ++i)
 		print_swap(&kswap[i]);
 	print_swap_total();
 }
 
 static void
 swapmode_sysctl(void)
 {
 	struct kvm_swap ksw;
 	struct xswdev xsw;
 	size_t mibsize, size;
 	int mib[16], n;
 
 	print_swap_header();
 	mibsize = sizeof mib / sizeof mib[0];
 	if (sysctlnametomib("vm.swap_info", mib, &mibsize) == -1)
 		err(1, "sysctlnametomib()");
 	for (n = 0; ; ++n) {
 		mib[mibsize] = n;
 		size = sizeof xsw;
 		if (sysctl(mib, mibsize + 1, &xsw, &size, NULL, 0) == -1)
 			break;
 		if (xsw.xsw_version != XSWDEV_VERSION)
 			errx(1, "xswdev version mismatch");
 		if (xsw.xsw_dev == NODEV)
 			snprintf(ksw.ksw_devname, sizeof ksw.ksw_devname,
 			    "<NFSfile>");
 		else
 			snprintf(ksw.ksw_devname, sizeof ksw.ksw_devname,
 			    "/dev/%s", devname(xsw.xsw_dev, S_IFCHR));
 		ksw.ksw_used = xsw.xsw_used;
 		ksw.ksw_total = xsw.xsw_nblks;
 		ksw.ksw_flags = xsw.xsw_flags;
 		print_swap(&ksw);
 	}
 	if (errno != ENOENT)
 		err(1, "sysctl()");
 	print_swap_total();
 }
 
 static void
 swapmode(void)
 {
 	if (kd != NULL)
 		swapmode_kvm();
 	else
 		swapmode_sysctl();
 }
Index: head/usr.sbin/sa/extern.h
===================================================================
--- head/usr.sbin/sa/extern.h	(revision 318735)
+++ head/usr.sbin/sa/extern.h	(revision 318736)
@@ -1,110 +1,110 @@
 /*
  * Copyright (c) 1994 Christopher G. Demetriou
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <db.h>
 
 /* structures */
 
 /* All times are stored in 1e-6s units. */
 
 struct cmdinfo {
 	char		ci_comm[MAXCOMLEN+2];	/* command name (+ '*') */
 	uid_t		ci_uid;			/* user id */
 	u_quad_t	ci_calls;		/* number of calls */
 	double		ci_etime;		/* elapsed time */
 	double		ci_utime;		/* user time */
 	double		ci_stime;		/* system time */
 	double		ci_mem;			/* memory use */
 	double		ci_io;			/* number of disk i/o ops */
 	u_int		ci_flags;		/* flags; see below */
 };
 #define	CI_UNPRINTABLE	0x0001			/* unprintable chars in name */
 
 struct userinfo {
 	uid_t		ui_uid;			/* user id; for consistency */
 	u_quad_t	ui_calls;		/* number of invocations */
 	double		ui_utime;		/* user time */
 	double		ui_stime;		/* system time */
 	double		ui_mem;			/* memory use */
 	double		ui_io;			/* number of disk i/o ops */
 };
 
 /* typedefs */
 
 typedef	int (*cmpf_t)(const DBT *, const DBT *);
 
 /* external functions in db.c */
 int db_copy_in(DB **mdb, const char *dbname, const char *name,
     BTREEINFO *bti, int (*v1_to_v2)(DBT *key, DBT *data));
 int db_copy_out(DB *mdb, const char *dbname, const char *name,
     BTREEINFO *bti);
 void db_destroy(DB *db, const char *uname);
 
 /* external functions in pdb.c */
 int	pacct_init(void);
 void	pacct_destroy(void);
 int	pacct_add(const struct cmdinfo *);
 int	pacct_update(void);
 void	pacct_print(void);
 
 /* external functions in readrec.c */
-int	readrec_forward(FILE *f, struct acctv2 *av2);
+int	readrec_forward(FILE *f, struct acctv3 *av2);
 
 /* external functions in usrdb.c */
 int	usracct_init(void);
 void	usracct_destroy(void);
 int	usracct_add(const struct cmdinfo *);
 int	usracct_update(void);
 void	usracct_print(void);
 
 /* variables */
 
 extern int	aflag, bflag, cflag, dflag, Dflag, fflag, iflag, jflag, kflag;
 extern int	Kflag, lflag, mflag, qflag, rflag, sflag, tflag, uflag, vflag;
 extern u_quad_t	cutoff;
 extern cmpf_t	sa_cmp;
 extern const char *pdb_file, *usrdb_file;
 
 /* some #defines to help with db's stupidity */
 
 #define	DB_CLOSE(db) \
 	((*(db)->close)(db))
 #define	DB_GET(db, key, data, flags) \
 	((*(db)->get)((db), (key), (data), (flags)))
 #define	DB_PUT(db, key, data, flags) \
 	((*(db)->put)((db), (key), (data), (flags)))
 #define	DB_SYNC(db, flags) \
 	((*(db)->sync)((db), (flags)))
 #define	DB_SEQ(db, key, data, flags) \
 	((*(db)->seq)((db), (key), (data), (flags)))
Index: head/usr.sbin/sa/main.c
===================================================================
--- head/usr.sbin/sa/main.c	(revision 318735)
+++ head/usr.sbin/sa/main.c	(revision 318736)
@@ -1,533 +1,533 @@
 /*
  * Copyright (c) 1994 Christopher G. Demetriou
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1994 Christopher G. Demetriou\n\
  All rights reserved.\n";
 #endif
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * sa:	system accounting
  */
 
 #include <sys/types.h>
 #include <sys/acct.h>
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "extern.h"
 #include "pathnames.h"
 
 static FILE	*acct_load(const char *, int);
 static int	 cmp_comm(const char *, const char *);
 static int	 cmp_usrsys(const DBT *, const DBT *);
 static int	 cmp_avgusrsys(const DBT *, const DBT *);
 static int	 cmp_dkio(const DBT *, const DBT *);
 static int	 cmp_avgdkio(const DBT *, const DBT *);
 static int	 cmp_cpumem(const DBT *, const DBT *);
 static int	 cmp_avgcpumem(const DBT *, const DBT *);
 static int	 cmp_calls(const DBT *, const DBT *);
 static void	 usage(void);
 
 int aflag, bflag, cflag, dflag, Dflag, fflag, iflag, jflag, kflag;
 int Kflag, lflag, mflag, qflag, rflag, sflag, tflag, uflag, vflag;
 u_quad_t cutoff = 1;
 const char *pdb_file = _PATH_SAVACCT;
 const char *usrdb_file = _PATH_USRACCT;
 
 static char	*dfltargv[] = { NULL };
 static int	dfltargc = (sizeof dfltargv/sizeof(char *));
 
 /* default to comparing by sum of user + system time */
 cmpf_t   sa_cmp = cmp_usrsys;
 
 int
 main(int argc, char **argv)
 {
 	FILE *f;
 	char pathacct[] = _PATH_ACCT;
 	int ch, error = 0;
 
 	dfltargv[0] = pathacct;
 
 	while ((ch = getopt(argc, argv, "abcdDfijkKlmnP:qrstuU:v:")) != -1)
 		switch (ch) {
 			case 'a':
 				/* print all commands */
 				aflag = 1;
 				break;
 			case 'b':
 				/* sort by per-call user/system time average */
 				bflag = 1;
 				sa_cmp = cmp_avgusrsys;
 				break;
 			case 'c':
 				/* print percentage total time */
 				cflag = 1;
 				break;
 			case 'd':
 				/* sort by averge number of disk I/O ops */
 				dflag = 1;
 				sa_cmp = cmp_avgdkio;
 				break;
 			case 'D':
 				/* print and sort by total disk I/O ops */
 				Dflag = 1;
 				sa_cmp = cmp_dkio;
 				break;
 			case 'f':
 				/* force no interactive threshold comprison */
 				fflag = 1;
 				break;
 			case 'i':
 				/* do not read in summary file */
 				iflag = 1;
 				break;
 			case 'j':
 				/* instead of total minutes, give sec/call */
 				jflag = 1;
 				break;
 			case 'k':
 				/* sort by cpu-time average memory usage */
 				kflag = 1;
 				sa_cmp = cmp_avgcpumem;
 				break;
 			case 'K':
 				/* print and sort by cpu-storage integral */
 				sa_cmp = cmp_cpumem;
 				Kflag = 1;
 				break;
 			case 'l':
 				/* separate system and user time */
 				lflag = 1;
 				break;
 			case 'm':
 				/* print procs and time per-user */
 				mflag = 1;
 				break;
 			case 'n':
 				/* sort by number of calls */
 				sa_cmp = cmp_calls;
 				break;
 			case 'P':
 				/* specify program database summary file */
 				pdb_file = optarg;
 				break;
 			case 'q':
 				/* quiet; error messages only */
 				qflag = 1;
 				break;
 			case 'r':
 				/* reverse order of sort */
 				rflag = 1;
 				break;
 			case 's':
 				/* merge accounting file into summaries */
 				sflag = 1;
 				break;
 			case 't':
 				/* report ratio of user and system times */
 				tflag = 1;
 				break;
 			case 'u':
 				/* first, print uid and command name */
 				uflag = 1;
 				break;
 			case 'U':
 				/* specify user database summary file */
 				usrdb_file = optarg;
 				break;
 			case 'v':
 				/* cull junk */
 				vflag = 1;
 				cutoff = atoi(optarg);
 				break;
 			case '?':
 	                default:
 				usage();
 		}
 
 	argc -= optind;
 	argv += optind;
 
 	/* various argument checking */
 	if (fflag && !vflag)
 		errx(1, "only one of -f requires -v");
 	if (fflag && aflag)
 		errx(1, "only one of -a and -v may be specified");
 	/* XXX need more argument checking */
 
 	if (!uflag) {
 		/* initialize tables */
 		if ((sflag || (!mflag && !qflag)) && pacct_init() != 0)
 			errx(1, "process accounting initialization failed");
 		if ((sflag || (mflag && !qflag)) && usracct_init() != 0)
 			errx(1, "user accounting initialization failed");
 	}
 
 	if (argc == 0) {
 		argc = dfltargc;
 		argv = dfltargv;
 	}
 
 	/* for each file specified */
 	for (; argc > 0; argc--, argv++) {
 		/*
 		 * load the accounting data from the file.
 		 * if it fails, go on to the next file.
 		 */
 		f = acct_load(argv[0], sflag);
 		if (f == NULL)
 			continue;
 
 		if (!uflag && sflag) {
 #ifndef DEBUG
 			sigset_t nmask, omask;
 			int unmask = 1;
 
 			/*
 			 * block most signals so we aren't interrupted during
 			 * the update.
 			 */
 			if (sigfillset(&nmask) == -1) {
 				warn("sigfillset");
 				unmask = 0;
 				error = 1;
 			}
 			if (unmask &&
 			    (sigprocmask(SIG_BLOCK, &nmask, &omask) == -1)) {
 				warn("couldn't set signal mask");
 				unmask = 0;
 				error = 1;
 			}
 #endif /* DEBUG */
 
 			/*
 			 * truncate the accounting data file ASAP, to avoid
 			 * losing data.  don't worry about errors in updating
 			 * the saved stats; better to underbill than overbill,
 			 * but we want every accounting record intact.
 			 */
 			if (ftruncate(fileno(f), 0) == -1) {
 				warn("couldn't truncate %s", *argv);
 				error = 1;
 			}
 
 			/*
 			 * update saved user and process accounting data.
 			 * note errors for later.
 			 */
 			if (pacct_update() != 0 || usracct_update() != 0)
 				error = 1;
 
 #ifndef DEBUG
 			/*
 			 * restore signals
 			 */
 			if (unmask &&
 			    (sigprocmask(SIG_SETMASK, &omask, NULL) == -1)) {
 				warn("couldn't restore signal mask");
 				error = 1;
 			}
 #endif /* DEBUG */
 		}
 
 		/*
 		 * close the opened accounting file
 		 */
 		if (fclose(f) == EOF) {
 			warn("fclose %s", *argv);
 			error = 1;
 		}
 	}
 
 	if (!uflag && !qflag) {
 		/* print any results we may have obtained. */
 		if (!mflag)
 			pacct_print();
 		else
 			usracct_print();
 	}
 
 	if (!uflag) {
 		/* finally, deallocate databases */
 		if (sflag || (!mflag && !qflag))
 			pacct_destroy();
 		if (sflag || (mflag && !qflag))
 			usracct_destroy();
 	}
 
 	exit(error);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 		"usage: sa [-abcdDfijkKlmnqrstu] [-P file] [-U file] [-v cutoff] [file ...]\n");
 	exit(1);
 }
 
 static FILE *
 acct_load(const char *pn, int wr)
 {
-	struct acctv2 ac;
+	struct acctv3 ac;
 	struct cmdinfo ci;
 	ssize_t rv;
 	FILE *f;
 	int i;
 
 	/*
 	 * open the file
 	 */
 	f = fopen(pn, wr ? "r+" : "r");
 	if (f == NULL) {
 		warn("open %s %s", pn, wr ? "for read/write" : "read-only");
 		return (NULL);
 	}
 
 	/*
 	 * read all we can; don't stat and open because more processes
 	 * could exit, and we'd miss them
 	 */
 	while (1) {
 		/* get one accounting entry and punt if there's an error */
 		rv = readrec_forward(f, &ac);
 		if (rv != 1) {
 			if (rv == EOF)
 				warn("error reading %s", pn);
 			break;
 		}
 
 		/* decode it */
 		ci.ci_calls = 1;
 		for (i = 0; i < (int)sizeof ac.ac_comm && ac.ac_comm[i] != '\0';
 		    i++) {
 			char c = ac.ac_comm[i];
 
 			if (!isascii(c) || iscntrl(c)) {
 				ci.ci_comm[i] = '?';
 				ci.ci_flags |= CI_UNPRINTABLE;
 			} else
 				ci.ci_comm[i] = c;
 		}
 		if (ac.ac_flagx & AFORK)
 			ci.ci_comm[i++] = '*';
 		ci.ci_comm[i++] = '\0';
 		ci.ci_etime = ac.ac_etime;
 		ci.ci_utime = ac.ac_utime;
 		ci.ci_stime = ac.ac_stime;
 		ci.ci_uid = ac.ac_uid;
 		ci.ci_mem = ac.ac_mem;
 		ci.ci_io = ac.ac_io;
 
 		if (!uflag) {
 			/* and enter it into the usracct and pacct databases */
 			if (sflag || (!mflag && !qflag))
 				pacct_add(&ci);
 			if (sflag || (mflag && !qflag))
 				usracct_add(&ci);
 		} else if (!qflag)
 			printf("%6u %12.3lf cpu %12.0lfk mem %12.0lf io %s\n",
 			    ci.ci_uid,
 			    (ci.ci_utime + ci.ci_stime) / 1000000,
 			    ci.ci_mem, ci.ci_io,
 			    ci.ci_comm);
 	}
 
 	/* Finally, return the file stream for possible truncation. */
 	return (f);
 }
 
 /* sort commands, doing the right thing in terms of reversals */
 static int
 cmp_comm(const char *s1, const char *s2)
 {
 	int rv;
 
 	rv = strcmp(s1, s2);
 	if (rv == 0)
 		rv = -1;
 	return (rflag ? rv : -rv);
 }
 
 /* sort by total user and system time */
 static int
 cmp_usrsys(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 	double t1, t2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	t1 = c1.ci_utime + c1.ci_stime;
 	t2 = c2.ci_utime + c2.ci_stime;
 
 	if (t1 < t2)
 		return -1;
 	else if (t1 == t2)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by average user and system time */
 static int
 cmp_avgusrsys(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 	double t1, t2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	t1 = c1.ci_utime + c1.ci_stime;
 	t1 /= (double) (c1.ci_calls ? c1.ci_calls : 1);
 
 	t2 = c2.ci_utime + c2.ci_stime;
 	t2 /= (double) (c2.ci_calls ? c2.ci_calls : 1);
 
 	if (t1 < t2)
 		return -1;
 	else if (t1 == t2)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by total number of disk I/O operations */
 static int
 cmp_dkio(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	if (c1.ci_io < c2.ci_io)
 		return -1;
 	else if (c1.ci_io == c2.ci_io)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by average number of disk I/O operations */
 static int
 cmp_avgdkio(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 	double n1, n2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	n1 = c1.ci_io / (double) (c1.ci_calls ? c1.ci_calls : 1);
 	n2 = c2.ci_io / (double) (c2.ci_calls ? c2.ci_calls : 1);
 
 	if (n1 < n2)
 		return -1;
 	else if (n1 == n2)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by the cpu-storage integral */
 static int
 cmp_cpumem(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	if (c1.ci_mem < c2.ci_mem)
 		return -1;
 	else if (c1.ci_mem == c2.ci_mem)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by the cpu-time average memory usage */
 static int
 cmp_avgcpumem(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 	double t1, t2;
 	double n1, n2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	t1 = c1.ci_utime + c1.ci_stime;
 	t2 = c2.ci_utime + c2.ci_stime;
 
 	n1 = c1.ci_mem / (t1 ? t1 : 1);
 	n2 = c2.ci_mem / (t2 ? t2 : 1);
 
 	if (n1 < n2)
 		return -1;
 	else if (n1 == n2)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }
 
 /* sort by the number of invocations */
 static int
 cmp_calls(const DBT *d1, const DBT *d2)
 {
 	struct cmdinfo c1, c2;
 
 	memcpy(&c1, d1->data, sizeof(c1));
 	memcpy(&c2, d2->data, sizeof(c2));
 
 	if (c1.ci_calls < c2.ci_calls)
 		return -1;
 	else if (c1.ci_calls == c2.ci_calls)
 		return (cmp_comm(c1.ci_comm, c2.ci_comm));
 	else
 		return 1;
 }