Index: head/cddl/lib/libzfs/Makefile =================================================================== --- head/cddl/lib/libzfs/Makefile (revision 318735) +++ head/cddl/lib/libzfs/Makefile (revision 318736) @@ -1,57 +1,58 @@ # $FreeBSD$ .PATH: ${SRCTOP}/cddl/compat/opensolaris/misc .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs .PATH: ${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common LIB= zfs LIBADD= md pthread umem util uutil m avl bsdxml geom nvpair z zfs_core SRCS= deviceid.c \ fsshare.c \ mkdirp.c \ mnttab.c \ thread_pool.c \ zmount.c \ zone.c SRCS+= libzfs_changelist.c \ libzfs_compat.c \ libzfs_config.c \ libzfs_dataset.c \ libzfs_diff.c \ libzfs_import.c \ libzfs_iter.c \ libzfs_mount.c \ libzfs_pool.c \ libzfs_sendrecv.c \ libzfs_status.c \ libzfs_util.c \ zfeature_common.c \ zfs_comutil.c \ zfs_deleg.c \ zfs_fletcher.c \ zfs_namecheck.c \ zfs_prop.c \ zpool_prop.c \ zprop_common.c \ WARNS?= 0 +SHLIB_MAJOR= 3 CSTD= c99 CFLAGS+= -DZFS_NO_ACL CFLAGS+= -I${SRCTOP}/sbin/mount CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/sys CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs_core/common .include Index: head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc =================================================================== --- head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc (revision 318735) +++ head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc (revision 318736) @@ -1,1405 +1,1409 @@ //===-- sanitizer_linux.cc ------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is shared between AddressSanitizer and ThreadSanitizer // run-time libraries and implements linux-specific functions from // sanitizer_libc.h. //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" #if SANITIZER_FREEBSD || SANITIZER_LINUX #include "sanitizer_common.h" #include "sanitizer_flags.h" #include "sanitizer_internal_defs.h" #include "sanitizer_libc.h" #include "sanitizer_linux.h" #include "sanitizer_mutex.h" #include "sanitizer_placement_new.h" #include "sanitizer_procmaps.h" #include "sanitizer_stacktrace.h" #include "sanitizer_symbolizer.h" #if !SANITIZER_FREEBSD #include #endif // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat' // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To // access stat from asm/stat.h, without conflicting with definition in // sys/stat.h, we use this trick. #if defined(__mips64) #include #include #define stat kernel_stat #include #undef stat #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if SANITIZER_FREEBSD #include #include #include #include #include extern "C" { // must be included after and on // FreeBSD 9.2 and 10.0. #include } extern char **environ; // provided by crt1 #endif // SANITIZER_FREEBSD #if !SANITIZER_ANDROID #include #endif #if SANITIZER_LINUX // struct kernel_timeval { long tv_sec; long tv_usec; }; // is broken on some linux distributions. const int FUTEX_WAIT = 0; const int FUTEX_WAKE = 1; #endif // SANITIZER_LINUX // Are we using 32-bit or 64-bit Linux syscalls? // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32 // but it still needs to use 64-bit syscalls. #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \ SANITIZER_WORDSIZE == 64) # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1 #else # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0 #endif #if defined(__x86_64__) || SANITIZER_MIPS64 extern "C" { extern void internal_sigreturn(); } #endif namespace __sanitizer { #if SANITIZER_LINUX && defined(__x86_64__) #include "sanitizer_syscall_linux_x86_64.inc" #elif SANITIZER_LINUX && defined(__aarch64__) #include "sanitizer_syscall_linux_aarch64.inc" #else #include "sanitizer_syscall_generic.inc" #endif // --------------- sanitizer_libc.h #if !SANITIZER_S390 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd, OFF_T offset) { #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd, offset); #else // mmap2 specifies file offset in 4096-byte units. CHECK(IsAligned(offset, 4096)); return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd, offset / 4096); #endif } #endif // !SANITIZER_S390 uptr internal_munmap(void *addr, uptr length) { return internal_syscall(SYSCALL(munmap), (uptr)addr, length); } int internal_mprotect(void *addr, uptr length, int prot) { return internal_syscall(SYSCALL(mprotect), (uptr)addr, length, prot); } uptr internal_close(fd_t fd) { return internal_syscall(SYSCALL(close), fd); } uptr internal_open(const char *filename, int flags) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags); #else return internal_syscall(SYSCALL(open), (uptr)filename, flags); #endif } uptr internal_open(const char *filename, int flags, u32 mode) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags, mode); #else return internal_syscall(SYSCALL(open), (uptr)filename, flags, mode); #endif } uptr internal_read(fd_t fd, void *buf, uptr count) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(read), fd, (uptr)buf, count)); return res; } uptr internal_write(fd_t fd, const void *buf, uptr count) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(write), fd, (uptr)buf, count)); return res; } uptr internal_ftruncate(fd_t fd, uptr size) { sptr res; HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(ftruncate), fd, (OFF_T)size)); return res; } #if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && !SANITIZER_FREEBSD static void stat64_to_stat(struct stat64 *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; out->st_ino = in->st_ino; out->st_mode = in->st_mode; out->st_nlink = in->st_nlink; out->st_uid = in->st_uid; out->st_gid = in->st_gid; out->st_rdev = in->st_rdev; out->st_size = in->st_size; out->st_blksize = in->st_blksize; out->st_blocks = in->st_blocks; out->st_atime = in->st_atime; out->st_mtime = in->st_mtime; out->st_ctime = in->st_ctime; out->st_ino = in->st_ino; } #endif #if defined(__mips64) static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) { internal_memset(out, 0, sizeof(*out)); out->st_dev = in->st_dev; out->st_ino = in->st_ino; out->st_mode = in->st_mode; out->st_nlink = in->st_nlink; out->st_uid = in->st_uid; out->st_gid = in->st_gid; out->st_rdev = in->st_rdev; out->st_size = in->st_size; out->st_blksize = in->st_blksize; out->st_blocks = in->st_blocks; out->st_atime = in->st_atime_nsec; out->st_mtime = in->st_mtime_nsec; out->st_ctime = in->st_ctime_nsec; out->st_ino = in->st_ino; } #endif uptr internal_stat(const char *path, void *buf) { #if SANITIZER_FREEBSD - return internal_syscall(SYSCALL(stat), path, buf); + return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, + (uptr)buf, 0); #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0); #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS # if defined(__mips64) // For mips64, stat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(stat), path, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; # else return internal_syscall(SYSCALL(stat), (uptr)path, (uptr)buf); # endif #else struct stat64 buf64; int res = internal_syscall(SYSCALL(stat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; #endif } uptr internal_lstat(const char *path, void *buf) { #if SANITIZER_FREEBSD - return internal_syscall(SYSCALL(lstat), path, buf); + return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, + (uptr)buf, AT_SYMLINK_NOFOLLOW); #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS # if SANITIZER_MIPS64 // For mips64, lstat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(lstat), path, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; # else return internal_syscall(SYSCALL(lstat), (uptr)path, (uptr)buf); # endif #else struct stat64 buf64; int res = internal_syscall(SYSCALL(lstat64), path, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; #endif } uptr internal_fstat(fd_t fd, void *buf) { #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS # if SANITIZER_MIPS64 // For mips64, fstat syscall fills buffer in the format of kernel_stat struct kernel_stat kbuf; int res = internal_syscall(SYSCALL(fstat), fd, &kbuf); kernel_stat_to_stat(&kbuf, (struct stat *)buf); return res; # else return internal_syscall(SYSCALL(fstat), fd, (uptr)buf); # endif #else struct stat64 buf64; int res = internal_syscall(SYSCALL(fstat64), fd, &buf64); stat64_to_stat(&buf64, (struct stat *)buf); return res; #endif } uptr internal_filesize(fd_t fd) { struct stat st; if (internal_fstat(fd, &st)) return -1; return (uptr)st.st_size; } uptr internal_dup2(int oldfd, int newfd) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(dup3), oldfd, newfd, 0); #else return internal_syscall(SYSCALL(dup2), oldfd, newfd); #endif } uptr internal_readlink(const char *path, char *buf, uptr bufsize) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(readlinkat), AT_FDCWD, (uptr)path, (uptr)buf, bufsize); #else return internal_syscall(SYSCALL(readlink), (uptr)path, (uptr)buf, bufsize); #endif } uptr internal_unlink(const char *path) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0); #else return internal_syscall(SYSCALL(unlink), (uptr)path); #endif } uptr internal_rename(const char *oldpath, const char *newpath) { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD, (uptr)newpath); #else return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath); #endif } uptr internal_sched_yield() { return internal_syscall(SYSCALL(sched_yield)); } void internal__exit(int exitcode) { #if SANITIZER_FREEBSD internal_syscall(SYSCALL(exit), exitcode); #else internal_syscall(SYSCALL(exit_group), exitcode); #endif Die(); // Unreachable. } unsigned int internal_sleep(unsigned int seconds) { struct timespec ts; ts.tv_sec = 1; ts.tv_nsec = 0; int res = internal_syscall(SYSCALL(nanosleep), &ts, &ts); if (res) return ts.tv_sec; return 0; } uptr internal_execve(const char *filename, char *const argv[], char *const envp[]) { return internal_syscall(SYSCALL(execve), (uptr)filename, (uptr)argv, (uptr)envp); } // ----------------- sanitizer_common.h bool FileExists(const char *filename) { struct stat st; #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS if (internal_syscall(SYSCALL(newfstatat), AT_FDCWD, filename, &st, 0)) #else if (internal_stat(filename, &st)) #endif return false; // Sanity check: filename is a regular file. return S_ISREG(st.st_mode); } uptr GetTid() { #if SANITIZER_FREEBSD return (uptr)pthread_self(); #else return internal_syscall(SYSCALL(gettid)); #endif } u64 NanoTime() { #if SANITIZER_FREEBSD timeval tv; #else kernel_timeval tv; #endif internal_memset(&tv, 0, sizeof(tv)); internal_syscall(SYSCALL(gettimeofday), (uptr)&tv, 0); return (u64)tv.tv_sec * 1000*1000*1000 + tv.tv_usec * 1000; } // Like getenv, but reads env directly from /proc (on Linux) or parses the // 'environ' array (on FreeBSD) and does not use libc. This function should be // called first inside __asan_init. const char *GetEnv(const char *name) { #if SANITIZER_FREEBSD if (::environ != 0) { uptr NameLen = internal_strlen(name); for (char **Env = ::environ; *Env != 0; Env++) { if (internal_strncmp(*Env, name, NameLen) == 0 && (*Env)[NameLen] == '=') return (*Env) + NameLen + 1; } } return 0; // Not found. #elif SANITIZER_LINUX static char *environ; static uptr len; static bool inited; if (!inited) { inited = true; uptr environ_size; if (!ReadFileToBuffer("/proc/self/environ", &environ, &environ_size, &len)) environ = nullptr; } if (!environ || len == 0) return nullptr; uptr namelen = internal_strlen(name); const char *p = environ; while (*p != '\0') { // will happen at the \0\0 that terminates the buffer // proc file has the format NAME=value\0NAME=value\0NAME=value\0... const char* endp = (char*)internal_memchr(p, '\0', len - (p - environ)); if (!endp) // this entry isn't NUL terminated return nullptr; else if (!internal_memcmp(p, name, namelen) && p[namelen] == '=') // Match. return p + namelen + 1; // point after = p = endp + 1; } return nullptr; // Not found. #else #error "Unsupported platform" #endif } #if !SANITIZER_FREEBSD extern "C" { SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end; } #endif #if !SANITIZER_GO && !SANITIZER_FREEBSD static void ReadNullSepFileToArray(const char *path, char ***arr, int arr_size) { char *buff; uptr buff_size; uptr buff_len; *arr = (char **)MmapOrDie(arr_size * sizeof(char *), "NullSepFileArray"); if (!ReadFileToBuffer(path, &buff, &buff_size, &buff_len, 1024 * 1024)) { (*arr)[0] = nullptr; return; } (*arr)[0] = buff; int count, i; for (count = 1, i = 1; ; i++) { if (buff[i] == 0) { if (buff[i+1] == 0) break; (*arr)[count] = &buff[i+1]; CHECK_LE(count, arr_size - 1); // FIXME: make this more flexible. count++; } } (*arr)[count] = nullptr; } #endif static void GetArgsAndEnv(char ***argv, char ***envp) { #if !SANITIZER_FREEBSD #if !SANITIZER_GO if (&__libc_stack_end) { #endif uptr* stack_end = (uptr*)__libc_stack_end; int argc = *stack_end; *argv = (char**)(stack_end + 1); *envp = (char**)(stack_end + argc + 2); #if !SANITIZER_GO } else { static const int kMaxArgv = 2000, kMaxEnvp = 2000; ReadNullSepFileToArray("/proc/self/cmdline", argv, kMaxArgv); ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp); } #endif #else // On FreeBSD, retrieving the argument and environment arrays is done via the // kern.ps_strings sysctl, which returns a pointer to a structure containing // this information. See also . ps_strings *pss; size_t sz = sizeof(pss); if (sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) { Printf("sysctl kern.ps_strings failed\n"); Die(); } *argv = pss->ps_argvstr; *envp = pss->ps_envstr; #endif } char **GetArgv() { char **argv, **envp; GetArgsAndEnv(&argv, &envp); return argv; } void ReExec() { char **argv, **envp; GetArgsAndEnv(&argv, &envp); uptr rv = internal_execve("/proc/self/exe", argv, envp); int rverrno; CHECK_EQ(internal_iserror(rv, &rverrno), true); Printf("execve failed, errno %d\n", rverrno); Die(); } enum MutexState { MtxUnlocked = 0, MtxLocked = 1, MtxSleeping = 2 }; BlockingMutex::BlockingMutex() { internal_memset(this, 0, sizeof(*this)); } void BlockingMutex::Lock() { CHECK_EQ(owner_, 0); atomic_uint32_t *m = reinterpret_cast(&opaque_storage_); if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked) return; while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) { #if SANITIZER_FREEBSD _umtx_op(m, UMTX_OP_WAIT_UINT, MtxSleeping, 0, 0); #else internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAIT, MtxSleeping, 0, 0, 0); #endif } } void BlockingMutex::Unlock() { atomic_uint32_t *m = reinterpret_cast(&opaque_storage_); u32 v = atomic_exchange(m, MtxUnlocked, memory_order_relaxed); CHECK_NE(v, MtxUnlocked); if (v == MtxSleeping) { #if SANITIZER_FREEBSD _umtx_op(m, UMTX_OP_WAKE, 1, 0, 0); #else internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAKE, 1, 0, 0, 0); #endif } } void BlockingMutex::CheckLocked() { atomic_uint32_t *m = reinterpret_cast(&opaque_storage_); CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed)); } // ----------------- sanitizer_linux.h // The actual size of this structure is specified by d_reclen. // Note that getdents64 uses a different structure format. We only provide the // 32-bit syscall here. struct linux_dirent { #if SANITIZER_X32 || defined(__aarch64__) u64 d_ino; u64 d_off; #else unsigned long d_ino; unsigned long d_off; #endif unsigned short d_reclen; #ifdef __aarch64__ unsigned char d_type; #endif char d_name[256]; }; // Syscall wrappers. uptr internal_ptrace(int request, int pid, void *addr, void *data) { return internal_syscall(SYSCALL(ptrace), request, pid, (uptr)addr, (uptr)data); } uptr internal_waitpid(int pid, int *status, int options) { return internal_syscall(SYSCALL(wait4), pid, (uptr)status, options, 0 /* rusage */); } uptr internal_getpid() { return internal_syscall(SYSCALL(getpid)); } uptr internal_getppid() { return internal_syscall(SYSCALL(getppid)); } uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) { -#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS +#if SANITIZER_FREEBSD + return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL); +#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count); #else return internal_syscall(SYSCALL(getdents), fd, (uptr)dirp, count); #endif } uptr internal_lseek(fd_t fd, OFF_T offset, int whence) { return internal_syscall(SYSCALL(lseek), fd, offset, whence); } #if SANITIZER_LINUX uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) { return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5); } #endif uptr internal_sigaltstack(const struct sigaltstack *ss, struct sigaltstack *oss) { return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss); } int internal_fork() { #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(clone), SIGCHLD, 0); #else return internal_syscall(SYSCALL(fork)); #endif } #if SANITIZER_LINUX #define SA_RESTORER 0x04000000 // Doesn't set sa_restorer if the caller did not set it, so use with caution //(see below). int internal_sigaction_norestorer(int signum, const void *act, void *oldact) { __sanitizer_kernel_sigaction_t k_act, k_oldact; internal_memset(&k_act, 0, sizeof(__sanitizer_kernel_sigaction_t)); internal_memset(&k_oldact, 0, sizeof(__sanitizer_kernel_sigaction_t)); const __sanitizer_sigaction *u_act = (const __sanitizer_sigaction *)act; __sanitizer_sigaction *u_oldact = (__sanitizer_sigaction *)oldact; if (u_act) { k_act.handler = u_act->handler; k_act.sigaction = u_act->sigaction; internal_memcpy(&k_act.sa_mask, &u_act->sa_mask, sizeof(__sanitizer_kernel_sigset_t)); // Without SA_RESTORER kernel ignores the calls (probably returns EINVAL). k_act.sa_flags = u_act->sa_flags | SA_RESTORER; // FIXME: most often sa_restorer is unset, however the kernel requires it // to point to a valid signal restorer that calls the rt_sigreturn syscall. // If sa_restorer passed to the kernel is NULL, the program may crash upon // signal delivery or fail to unwind the stack in the signal handler. // libc implementation of sigaction() passes its own restorer to // rt_sigaction, so we need to do the same (we'll need to reimplement the // restorers; for x86_64 the restorer address can be obtained from // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact). #if !SANITIZER_ANDROID || !SANITIZER_MIPS32 k_act.sa_restorer = u_act->sa_restorer; #endif } uptr result = internal_syscall(SYSCALL(rt_sigaction), (uptr)signum, (uptr)(u_act ? &k_act : nullptr), (uptr)(u_oldact ? &k_oldact : nullptr), (uptr)sizeof(__sanitizer_kernel_sigset_t)); if ((result == 0) && u_oldact) { u_oldact->handler = k_oldact.handler; u_oldact->sigaction = k_oldact.sigaction; internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask, sizeof(__sanitizer_kernel_sigset_t)); u_oldact->sa_flags = k_oldact.sa_flags; #if !SANITIZER_ANDROID || !SANITIZER_MIPS32 u_oldact->sa_restorer = k_oldact.sa_restorer; #endif } return result; } // Invokes sigaction via a raw syscall with a restorer, but does not support // all platforms yet. // We disable for Go simply because we have not yet added to buildgo.sh. #if (defined(__x86_64__) || SANITIZER_MIPS64) && !SANITIZER_GO int internal_sigaction_syscall(int signum, const void *act, void *oldact) { if (act == nullptr) return internal_sigaction_norestorer(signum, act, oldact); __sanitizer_sigaction u_adjust; internal_memcpy(&u_adjust, act, sizeof(u_adjust)); #if !SANITIZER_ANDROID || !SANITIZER_MIPS32 if (u_adjust.sa_restorer == nullptr) { u_adjust.sa_restorer = internal_sigreturn; } #endif return internal_sigaction_norestorer(signum, (const void *)&u_adjust, oldact); } #endif // defined(__x86_64__) && !SANITIZER_GO #endif // SANITIZER_LINUX uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) { #if SANITIZER_FREEBSD return internal_syscall(SYSCALL(sigprocmask), how, set, oldset); #else __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; __sanitizer_kernel_sigset_t *k_oldset = (__sanitizer_kernel_sigset_t *)oldset; return internal_syscall(SYSCALL(rt_sigprocmask), (uptr)how, (uptr)&k_set->sig[0], (uptr)&k_oldset->sig[0], sizeof(__sanitizer_kernel_sigset_t)); #endif } void internal_sigfillset(__sanitizer_sigset_t *set) { internal_memset(set, 0xff, sizeof(*set)); } void internal_sigemptyset(__sanitizer_sigset_t *set) { internal_memset(set, 0, sizeof(*set)); } #if SANITIZER_LINUX void internal_sigdelset(__sanitizer_sigset_t *set, int signum) { signum -= 1; CHECK_GE(signum, 0); CHECK_LT(signum, sizeof(*set) * 8); __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; const uptr idx = signum / (sizeof(k_set->sig[0]) * 8); const uptr bit = signum % (sizeof(k_set->sig[0]) * 8); k_set->sig[idx] &= ~(1 << bit); } bool internal_sigismember(__sanitizer_sigset_t *set, int signum) { signum -= 1; CHECK_GE(signum, 0); CHECK_LT(signum, sizeof(*set) * 8); __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set; const uptr idx = signum / (sizeof(k_set->sig[0]) * 8); const uptr bit = signum % (sizeof(k_set->sig[0]) * 8); return k_set->sig[idx] & (1 << bit); } #endif // SANITIZER_LINUX // ThreadLister implementation. ThreadLister::ThreadLister(int pid) : pid_(pid), descriptor_(-1), buffer_(4096), error_(true), entry_((struct linux_dirent *)buffer_.data()), bytes_read_(0) { char task_directory_path[80]; internal_snprintf(task_directory_path, sizeof(task_directory_path), "/proc/%d/task/", pid); uptr openrv = internal_open(task_directory_path, O_RDONLY | O_DIRECTORY); if (internal_iserror(openrv)) { error_ = true; Report("Can't open /proc/%d/task for reading.\n", pid); } else { error_ = false; descriptor_ = openrv; } } int ThreadLister::GetNextTID() { int tid = -1; do { if (error_) return -1; if ((char *)entry_ >= &buffer_[bytes_read_] && !GetDirectoryEntries()) return -1; if (entry_->d_ino != 0 && entry_->d_name[0] >= '0' && entry_->d_name[0] <= '9') { // Found a valid tid. tid = (int)internal_atoll(entry_->d_name); } entry_ = (struct linux_dirent *)(((char *)entry_) + entry_->d_reclen); } while (tid < 0); return tid; } void ThreadLister::Reset() { if (error_ || descriptor_ < 0) return; internal_lseek(descriptor_, 0, SEEK_SET); } ThreadLister::~ThreadLister() { if (descriptor_ >= 0) internal_close(descriptor_); } bool ThreadLister::error() { return error_; } bool ThreadLister::GetDirectoryEntries() { CHECK_GE(descriptor_, 0); CHECK_NE(error_, true); bytes_read_ = internal_getdents(descriptor_, (struct linux_dirent *)buffer_.data(), buffer_.size()); if (internal_iserror(bytes_read_)) { Report("Can't read directory entries from /proc/%d/task.\n", pid_); error_ = true; return false; } else if (bytes_read_ == 0) { return false; } entry_ = (struct linux_dirent *)buffer_.data(); return true; } uptr GetPageSize() { // Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. #if SANITIZER_ANDROID return 4096; #elif SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) return EXEC_PAGESIZE; #else return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. #endif } uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) { #if SANITIZER_FREEBSD const int Mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; const char *default_module_name = "kern.proc.pathname"; size_t Size = buf_len; bool IsErr = (sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0); int readlink_error = IsErr ? errno : 0; uptr module_name_len = Size; #else const char *default_module_name = "/proc/self/exe"; uptr module_name_len = internal_readlink( default_module_name, buf, buf_len); int readlink_error; bool IsErr = internal_iserror(module_name_len, &readlink_error); #endif if (IsErr) { // We can't read binary name for some reason, assume it's unknown. Report("WARNING: reading executable name failed with errno %d, " "some stack frames may not be symbolized\n", readlink_error); module_name_len = internal_snprintf(buf, buf_len, "%s", default_module_name); CHECK_LT(module_name_len, buf_len); } return module_name_len; } uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) { #if SANITIZER_LINUX char *tmpbuf; uptr tmpsize; uptr tmplen; if (ReadFileToBuffer("/proc/self/cmdline", &tmpbuf, &tmpsize, &tmplen, 1024 * 1024)) { internal_strncpy(buf, tmpbuf, buf_len); UnmapOrDie(tmpbuf, tmpsize); return internal_strlen(buf); } #endif return ReadBinaryName(buf, buf_len); } // Match full names of the form /path/to/base_name{-,.}* bool LibraryNameIs(const char *full_name, const char *base_name) { const char *name = full_name; // Strip path. while (*name != '\0') name++; while (name > full_name && *name != '/') name--; if (*name == '/') name++; uptr base_name_length = internal_strlen(base_name); if (internal_strncmp(name, base_name, base_name_length)) return false; return (name[base_name_length] == '-' || name[base_name_length] == '.'); } #if !SANITIZER_ANDROID // Call cb for each region mapped by map. void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) { CHECK_NE(map, nullptr); #if !SANITIZER_FREEBSD typedef ElfW(Phdr) Elf_Phdr; typedef ElfW(Ehdr) Elf_Ehdr; #endif // !SANITIZER_FREEBSD char *base = (char *)map->l_addr; Elf_Ehdr *ehdr = (Elf_Ehdr *)base; char *phdrs = base + ehdr->e_phoff; char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize; // Find the segment with the minimum base so we can "relocate" the p_vaddr // fields. Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC // objects have a non-zero base. uptr preferred_base = (uptr)-1; for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { Elf_Phdr *phdr = (Elf_Phdr *)iter; if (phdr->p_type == PT_LOAD && preferred_base > (uptr)phdr->p_vaddr) preferred_base = (uptr)phdr->p_vaddr; } // Compute the delta from the real base to get a relocation delta. sptr delta = (uptr)base - preferred_base; // Now we can figure out what the loader really mapped. for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { Elf_Phdr *phdr = (Elf_Phdr *)iter; if (phdr->p_type == PT_LOAD) { uptr seg_start = phdr->p_vaddr + delta; uptr seg_end = seg_start + phdr->p_memsz; // None of these values are aligned. We consider the ragged edges of the // load command as defined, since they are mapped from the file. seg_start = RoundDownTo(seg_start, GetPageSizeCached()); seg_end = RoundUpTo(seg_end, GetPageSizeCached()); cb((void *)seg_start, seg_end - seg_start); } } } #endif #if defined(__x86_64__) && SANITIZER_LINUX // We cannot use glibc's clone wrapper, because it messes with the child // task's TLS. It writes the PID and TID of the child task to its thread // descriptor, but in our case the child task shares the thread descriptor with // the parent (because we don't know how to allocate a new thread // descriptor to keep glibc happy). So the stock version of clone(), when // used with CLONE_VM, would end up corrupting the parent's thread descriptor. uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register void *r8 __asm__("r8") = newtls; register int *r10 __asm__("r10") = child_tidptr; __asm__ __volatile__( /* %rax = syscall(%rax = SYSCALL(clone), * %rdi = flags, * %rsi = child_stack, * %rdx = parent_tidptr, * %r8 = new_tls, * %r10 = child_tidptr) */ "syscall\n" /* if (%rax != 0) * return; */ "testq %%rax,%%rax\n" "jnz 1f\n" /* In the child. Terminate unwind chain. */ // XXX: We should also terminate the CFI unwind chain // here. Unfortunately clang 3.2 doesn't support the // necessary CFI directives, so we skip that part. "xorq %%rbp,%%rbp\n" /* Call "fn(arg)". */ "popq %%rax\n" "popq %%rdi\n" "call *%%rax\n" /* Call _exit(%rax). */ "movq %%rax,%%rdi\n" "movq %2,%%rax\n" "syscall\n" /* Return to parent. */ "1:\n" : "=a" (res) : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "S"(child_stack), "D"(flags), "d"(parent_tidptr), "r"(r8), "r"(r10) : "rsp", "memory", "r11", "rcx"); return res; } #elif defined(__mips__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register void *a3 __asm__("$7") = newtls; register int *a4 __asm__("$8") = child_tidptr; // We don't have proper CFI directives here because it requires alot of code // for very marginal benefits. __asm__ __volatile__( /* $v0 = syscall($v0 = __NR_clone, * $a0 = flags, * $a1 = child_stack, * $a2 = parent_tidptr, * $a3 = new_tls, * $a4 = child_tidptr) */ ".cprestore 16;\n" "move $4,%1;\n" "move $5,%2;\n" "move $6,%3;\n" "move $7,%4;\n" /* Store the fifth argument on stack * if we are using 32-bit abi. */ #if SANITIZER_WORDSIZE == 32 "lw %5,16($29);\n" #else "move $8,%5;\n" #endif "li $2,%6;\n" "syscall;\n" /* if ($v0 != 0) * return; */ "bnez $2,1f;\n" /* Call "fn(arg)". */ #if SANITIZER_WORDSIZE == 32 #ifdef __BIG_ENDIAN__ "lw $25,4($29);\n" "lw $4,12($29);\n" #else "lw $25,0($29);\n" "lw $4,8($29);\n" #endif #else "ld $25,0($29);\n" "ld $4,8($29);\n" #endif "jal $25;\n" /* Call _exit($v0). */ "move $4,$2;\n" "li $2,%7;\n" "syscall;\n" /* Return to parent. */ "1:\n" : "=r" (res) : "r"(flags), "r"(child_stack), "r"(parent_tidptr), "r"(a3), "r"(a4), "i"(__NR_clone), "i"(__NR_exit) : "memory", "$29" ); return res; } #elif defined(__aarch64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register int (*__fn)(void *) __asm__("x0") = fn; register void *__stack __asm__("x1") = child_stack; register int __flags __asm__("x2") = flags; register void *__arg __asm__("x3") = arg; register int *__ptid __asm__("x4") = parent_tidptr; register void *__tls __asm__("x5") = newtls; register int *__ctid __asm__("x6") = child_tidptr; __asm__ __volatile__( "mov x0,x2\n" /* flags */ "mov x2,x4\n" /* ptid */ "mov x3,x5\n" /* tls */ "mov x4,x6\n" /* ctid */ "mov x8,%9\n" /* clone */ "svc 0x0\n" /* if (%r0 != 0) * return %r0; */ "cmp x0, #0\n" "bne 1f\n" /* In the child, now. Call "fn(arg)". */ "ldp x1, x0, [sp], #16\n" "blr x1\n" /* Call _exit(%r0). */ "mov x8, %10\n" "svc 0x0\n" "1:\n" : "=r" (res) : "i"(-EINVAL), "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), "r"(__ptid), "r"(__tls), "r"(__ctid), "i"(__NR_clone), "i"(__NR_exit) : "x30", "memory"); return res; } #elif defined(__powerpc64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; /* Stack frame offsets. */ #if _CALL_ELF != 2 #define FRAME_MIN_SIZE 112 #define FRAME_TOC_SAVE 40 #else #define FRAME_MIN_SIZE 32 #define FRAME_TOC_SAVE 24 #endif if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); ((unsigned long long *)child_stack)[0] = (uptr)fn; ((unsigned long long *)child_stack)[1] = (uptr)arg; register int (*__fn)(void *) __asm__("r3") = fn; register void *__cstack __asm__("r4") = child_stack; register int __flags __asm__("r5") = flags; register void * __arg __asm__("r6") = arg; register int * __ptidptr __asm__("r7") = parent_tidptr; register void * __newtls __asm__("r8") = newtls; register int * __ctidptr __asm__("r9") = child_tidptr; __asm__ __volatile__( /* fn, arg, child_stack are saved acrVoss the syscall */ "mr 28, %5\n\t" "mr 29, %6\n\t" "mr 27, %8\n\t" /* syscall r3 == flags r4 == child_stack r5 == parent_tidptr r6 == newtls r7 == child_tidptr */ "mr 3, %7\n\t" "mr 5, %9\n\t" "mr 6, %10\n\t" "mr 7, %11\n\t" "li 0, %3\n\t" "sc\n\t" /* Test if syscall was successful */ "cmpdi cr1, 3, 0\n\t" "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" "bne- cr1, 1f\n\t" /* Do the function call */ "std 2, %13(1)\n\t" #if _CALL_ELF != 2 "ld 0, 0(28)\n\t" "ld 2, 8(28)\n\t" "mtctr 0\n\t" #else "mr 12, 28\n\t" "mtctr 12\n\t" #endif "mr 3, 27\n\t" "bctrl\n\t" "ld 2, %13(1)\n\t" /* Call _exit(r3) */ "li 0, %4\n\t" "sc\n\t" /* Return to parent */ "1:\n\t" "mr %0, 3\n\t" : "=r" (res) : "0" (-1), "i" (EINVAL), "i" (__NR_clone), "i" (__NR_exit), "r" (__fn), "r" (__cstack), "r" (__flags), "r" (__arg), "r" (__ptidptr), "r" (__newtls), "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE) : "cr0", "cr1", "memory", "ctr", "r0", "r29", "r27", "r28"); return res; } #endif // defined(__x86_64__) && SANITIZER_LINUX #if SANITIZER_ANDROID #if __ANDROID_API__ < 21 extern "C" __attribute__((weak)) int dl_iterate_phdr( int (*)(struct dl_phdr_info *, size_t, void *), void *); #endif static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size, void *data) { // Any name starting with "lib" indicates a bug in L where library base names // are returned instead of paths. if (info->dlpi_name && info->dlpi_name[0] == 'l' && info->dlpi_name[1] == 'i' && info->dlpi_name[2] == 'b') { *(bool *)data = true; return 1; } return 0; } static atomic_uint32_t android_api_level; static AndroidApiLevel AndroidDetectApiLevel() { if (!&dl_iterate_phdr) return ANDROID_KITKAT; // K or lower bool base_name_seen = false; dl_iterate_phdr(dl_iterate_phdr_test_cb, &base_name_seen); if (base_name_seen) return ANDROID_LOLLIPOP_MR1; // L MR1 return ANDROID_POST_LOLLIPOP; // post-L // Plain L (API level 21) is completely broken wrt ASan and not very // interesting to detect. } AndroidApiLevel AndroidGetApiLevel() { AndroidApiLevel level = (AndroidApiLevel)atomic_load(&android_api_level, memory_order_relaxed); if (level) return level; level = AndroidDetectApiLevel(); atomic_store(&android_api_level, level, memory_order_relaxed); return level; } #endif bool IsHandledDeadlySignal(int signum) { if (common_flags()->handle_abort && signum == SIGABRT) return true; if (common_flags()->handle_sigill && signum == SIGILL) return true; if (common_flags()->handle_sigfpe && signum == SIGFPE) return true; return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv; } #if !SANITIZER_GO void *internal_start_thread(void(*func)(void *arg), void *arg) { // Start the thread with signals blocked, otherwise it can steal user signals. __sanitizer_sigset_t set, old; internal_sigfillset(&set); #if SANITIZER_LINUX && !SANITIZER_ANDROID // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked // on any thread, setuid call hangs (see test/tsan/setuid.c). internal_sigdelset(&set, 33); #endif internal_sigprocmask(SIG_SETMASK, &set, &old); void *th; real_pthread_create(&th, nullptr, (void*(*)(void *arg))func, arg); internal_sigprocmask(SIG_SETMASK, &old, nullptr); return th; } void internal_join_thread(void *th) { real_pthread_join(th, nullptr); } #else void *internal_start_thread(void (*func)(void *), void *arg) { return 0; } void internal_join_thread(void *th) {} #endif #if defined(__aarch64__) // Android headers in the older NDK releases miss this definition. struct __sanitizer_esr_context { struct _aarch64_ctx head; uint64_t esr; }; static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { static const u32 kEsrMagic = 0x45535201; u8 *aux = ucontext->uc_mcontext.__reserved; while (true) { _aarch64_ctx *ctx = (_aarch64_ctx *)aux; if (ctx->size == 0) break; if (ctx->magic == kEsrMagic) { *esr = ((__sanitizer_esr_context *)ctx)->esr; return true; } aux += ctx->size; } return false; } #endif SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) { ucontext_t *ucontext = (ucontext_t *)context; #if defined(__x86_64__) || defined(__i386__) static const uptr PF_WRITE = 1U << 1; #if SANITIZER_FREEBSD uptr err = ucontext->uc_mcontext.mc_err; #else uptr err = ucontext->uc_mcontext.gregs[REG_ERR]; #endif return err & PF_WRITE ? WRITE : READ; #elif defined(__arm__) static const uptr FSR_WRITE = 1U << 11; uptr fsr = ucontext->uc_mcontext.error_code; return fsr & FSR_WRITE ? WRITE : READ; #elif defined(__aarch64__) static const u64 ESR_ELx_WNR = 1U << 6; u64 esr; if (!Aarch64GetESR(ucontext, &esr)) return UNKNOWN; return esr & ESR_ELx_WNR ? WRITE : READ; #else (void)ucontext; return UNKNOWN; // FIXME: Implement. #endif } void SignalContext::DumpAllRegisters(void *context) { // FIXME: Implement this. } void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { #if defined(__arm__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.arm_pc; *bp = ucontext->uc_mcontext.arm_fp; *sp = ucontext->uc_mcontext.arm_sp; #elif defined(__aarch64__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.regs[29]; *sp = ucontext->uc_mcontext.sp; #elif defined(__hppa__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.sc_iaoq[0]; /* GCC uses %r3 whenever a frame pointer is needed. */ *bp = ucontext->uc_mcontext.sc_gr[3]; *sp = ucontext->uc_mcontext.sc_gr[30]; #elif defined(__x86_64__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.mc_rip; *bp = ucontext->uc_mcontext.mc_rbp; *sp = ucontext->uc_mcontext.mc_rsp; # else ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.gregs[REG_RIP]; *bp = ucontext->uc_mcontext.gregs[REG_RBP]; *sp = ucontext->uc_mcontext.gregs[REG_RSP]; # endif #elif defined(__i386__) # if SANITIZER_FREEBSD ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.mc_eip; *bp = ucontext->uc_mcontext.mc_ebp; *sp = ucontext->uc_mcontext.mc_esp; # else ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.gregs[REG_EIP]; *bp = ucontext->uc_mcontext.gregs[REG_EBP]; *sp = ucontext->uc_mcontext.gregs[REG_ESP]; # endif #elif defined(__powerpc__) || defined(__powerpc64__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.regs->nip; *sp = ucontext->uc_mcontext.regs->gpr[PT_R1]; // The powerpc{,64}-linux ABIs do not specify r31 as the frame // pointer, but GCC always uses r31 when we need a frame pointer. *bp = ucontext->uc_mcontext.regs->gpr[PT_R31]; #elif defined(__sparc__) ucontext_t *ucontext = (ucontext_t*)context; uptr *stk_ptr; # if defined (__arch64__) *pc = ucontext->uc_mcontext.mc_gregs[MC_PC]; *sp = ucontext->uc_mcontext.mc_gregs[MC_O6]; stk_ptr = (uptr *) (*sp + 2047); *bp = stk_ptr[15]; # else *pc = ucontext->uc_mcontext.gregs[REG_PC]; *sp = ucontext->uc_mcontext.gregs[REG_O6]; stk_ptr = (uptr *) *sp; *bp = stk_ptr[15]; # endif #elif defined(__mips__) ucontext_t *ucontext = (ucontext_t*)context; *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.gregs[30]; *sp = ucontext->uc_mcontext.gregs[29]; #elif defined(__s390__) ucontext_t *ucontext = (ucontext_t*)context; # if defined(__s390x__) *pc = ucontext->uc_mcontext.psw.addr; # else *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff; # endif *bp = ucontext->uc_mcontext.gregs[11]; *sp = ucontext->uc_mcontext.gregs[15]; #else # error "Unsupported arch" #endif } void MaybeReexec() { // No need to re-exec on Linux. } void PrintModuleMap() { } uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding) { UNREACHABLE("FindAvailableMemoryRange is not available"); return 0; } } // namespace __sanitizer #endif // SANITIZER_FREEBSD || SANITIZER_LINUX Index: head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h =================================================================== --- head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h (revision 318735) +++ head/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h (revision 318736) @@ -1,1487 +1,1488 @@ //===-- sanitizer_platform_limits_posix.h ---------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of Sanitizer common code. // // Sizes and layouts of platform-specific POSIX data structures. //===----------------------------------------------------------------------===// #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H #define SANITIZER_PLATFORM_LIMITS_POSIX_H #include "sanitizer_internal_defs.h" #include "sanitizer_platform.h" #if SANITIZER_FREEBSD // FreeBSD's dlopen() returns a pointer to an Obj_Entry structure that // incorporates the map structure. # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \ ((link_map*)((handle) == nullptr ? nullptr : ((char*)(handle) + 544))) #else # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle)) #endif // !SANITIZER_FREEBSD #ifndef __GLIBC_PREREQ #define __GLIBC_PREREQ(x, y) 0 #endif namespace __sanitizer { extern unsigned struct_utsname_sz; extern unsigned struct_stat_sz; #if !SANITIZER_FREEBSD && !SANITIZER_IOS extern unsigned struct_stat64_sz; #endif extern unsigned struct_rusage_sz; extern unsigned siginfo_t_sz; extern unsigned struct_itimerval_sz; extern unsigned pthread_t_sz; extern unsigned pthread_cond_t_sz; extern unsigned pid_t_sz; extern unsigned timeval_sz; extern unsigned uid_t_sz; extern unsigned gid_t_sz; extern unsigned mbstate_t_sz; extern unsigned struct_timezone_sz; extern unsigned struct_tms_sz; extern unsigned struct_itimerspec_sz; extern unsigned struct_sigevent_sz; extern unsigned struct_sched_param_sz; extern unsigned struct_statfs64_sz; #if !SANITIZER_ANDROID extern unsigned struct_statfs_sz; extern unsigned struct_sockaddr_sz; extern unsigned ucontext_t_sz; #endif // !SANITIZER_ANDROID #if SANITIZER_LINUX #if defined(__x86_64__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 0; #elif defined(__i386__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 96; #elif defined(__arm__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__aarch64__) const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__powerpc__) && !defined(__powerpc64__) const unsigned struct_kernel_stat_sz = 72; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__powerpc64__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__riscv__) /* RISCVTODO: check that these values are correct */ const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 128; #elif defined(__mips__) const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) : FIRST_32_SECOND_64(144, 216); const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390__) && !defined(__s390x__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390x__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 0; #elif defined(__sparc__) && defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 104; const unsigned struct_kernel_stat64_sz = 144; #elif defined(__sparc__) && !defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #endif struct __sanitizer_perf_event_attr { unsigned type; unsigned size; // More fields that vary with the kernel version. }; extern unsigned struct_epoll_event_sz; extern unsigned struct_sysinfo_sz; extern unsigned __user_cap_header_struct_sz; extern unsigned __user_cap_data_struct_sz; extern unsigned struct_new_utsname_sz; extern unsigned struct_old_utsname_sz; extern unsigned struct_oldold_utsname_sz; const unsigned struct_kexec_segment_sz = 4 * sizeof(unsigned long); #endif // SANITIZER_LINUX #if SANITIZER_LINUX || SANITIZER_FREEBSD #if defined(__powerpc64__) || defined(__riscv__) || defined(__s390__) const unsigned struct___old_kernel_stat_sz = 0; #elif !defined(__sparc__) const unsigned struct___old_kernel_stat_sz = 32; #endif extern unsigned struct_rlimit_sz; extern unsigned struct_utimbuf_sz; extern unsigned struct_timespec_sz; struct __sanitizer_iocb { u64 aio_data; u32 aio_key_or_aio_reserved1; // Simply crazy. u32 aio_reserved1_or_aio_key; // Luckily, we don't need these. u16 aio_lio_opcode; s16 aio_reqprio; u32 aio_fildes; u64 aio_buf; u64 aio_nbytes; s64 aio_offset; u64 aio_reserved2; u64 aio_reserved3; }; struct __sanitizer_io_event { u64 data; u64 obj; u64 res; u64 res2; }; const unsigned iocb_cmd_pread = 0; const unsigned iocb_cmd_pwrite = 1; const unsigned iocb_cmd_preadv = 7; const unsigned iocb_cmd_pwritev = 8; struct __sanitizer___sysctl_args { int *name; int nlen; void *oldval; uptr *oldlenp; void *newval; uptr newlen; unsigned long ___unused[4]; }; const unsigned old_sigset_t_sz = sizeof(unsigned long); struct __sanitizer_sem_t { #if SANITIZER_ANDROID && defined(_LP64) int data[4]; #elif SANITIZER_ANDROID && !defined(_LP64) int data; #elif SANITIZER_LINUX uptr data[4]; #elif SANITIZER_FREEBSD u32 data[4]; #endif }; #endif // SANITIZER_LINUX || SANITIZER_FREEBSD #if SANITIZER_ANDROID struct __sanitizer_mallinfo { uptr v[10]; }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_mallinfo { int v[10]; }; extern unsigned struct_ustat_sz; extern unsigned struct_rlimit64_sz; extern unsigned struct_statvfs64_sz; struct __sanitizer_ipc_perm { int __key; int uid; int gid; int cuid; int cgid; #ifdef __powerpc__ unsigned mode; unsigned __seq; u64 __unused1; u64 __unused2; #elif defined(__sparc__) #if defined(__arch64__) unsigned mode; unsigned short __pad1; #else unsigned short __pad1; unsigned short mode; unsigned short __pad2; #endif unsigned short __seq; unsigned long long __unused1; unsigned long long __unused2; #elif defined(__mips__) || defined(__aarch64__) || defined(__s390x__) unsigned int mode; unsigned short __seq; unsigned short __pad1; unsigned long __unused1; unsigned long __unused2; #else unsigned short mode; unsigned short __pad1; unsigned short __seq; unsigned short __pad2; #if defined(__x86_64__) && !defined(_LP64) u64 __unused1; u64 __unused2; #else unsigned long __unused1; unsigned long __unused2; #endif #endif }; struct __sanitizer_shmid_ds { __sanitizer_ipc_perm shm_perm; #if defined(__sparc__) #if !defined(__arch64__) u32 __pad1; #endif long shm_atime; #if !defined(__arch64__) u32 __pad2; #endif long shm_dtime; #if !defined(__arch64__) u32 __pad3; #endif long shm_ctime; uptr shm_segsz; int shm_cpid; int shm_lpid; unsigned long shm_nattch; unsigned long __glibc_reserved1; unsigned long __glibc_reserved2; #else #ifndef __powerpc__ uptr shm_segsz; #elif !defined(__powerpc64__) uptr __unused0; #endif #if defined(__x86_64__) && !defined(_LP64) u64 shm_atime; u64 shm_dtime; u64 shm_ctime; #else uptr shm_atime; #if !defined(_LP64) && !defined(__mips__) uptr __unused1; #endif uptr shm_dtime; #if !defined(_LP64) && !defined(__mips__) uptr __unused2; #endif uptr shm_ctime; #if !defined(_LP64) && !defined(__mips__) uptr __unused3; #endif #endif #ifdef __powerpc__ uptr shm_segsz; #endif int shm_cpid; int shm_lpid; #if defined(__x86_64__) && !defined(_LP64) u64 shm_nattch; u64 __unused4; u64 __unused5; #else uptr shm_nattch; uptr __unused4; uptr __unused5; #endif #endif }; #elif SANITIZER_FREEBSD struct __sanitizer_ipc_perm { unsigned int cuid; unsigned int cgid; unsigned int uid; unsigned int gid; unsigned short mode; unsigned short seq; long key; }; struct __sanitizer_shmid_ds { __sanitizer_ipc_perm shm_perm; unsigned long shm_segsz; unsigned int shm_lpid; unsigned int shm_cpid; int shm_nattch; unsigned long shm_atime; unsigned long shm_dtime; unsigned long shm_ctime; }; #endif #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID extern unsigned struct_msqid_ds_sz; extern unsigned struct_mq_attr_sz; extern unsigned struct_timex_sz; extern unsigned struct_statvfs_sz; #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID struct __sanitizer_iovec { void *iov_base; uptr iov_len; }; #if !SANITIZER_ANDROID struct __sanitizer_ifaddrs { struct __sanitizer_ifaddrs *ifa_next; char *ifa_name; unsigned int ifa_flags; void *ifa_addr; // (struct sockaddr *) void *ifa_netmask; // (struct sockaddr *) // This is a union on Linux. # ifdef ifa_dstaddr # undef ifa_dstaddr # endif void *ifa_dstaddr; // (struct sockaddr *) void *ifa_data; }; #endif // !SANITIZER_ANDROID #if SANITIZER_MAC typedef unsigned long __sanitizer_pthread_key_t; #else typedef unsigned __sanitizer_pthread_key_t; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_XDR { int x_op; void *x_ops; uptr x_public; uptr x_private; uptr x_base; unsigned x_handy; }; const int __sanitizer_XDR_ENCODE = 0; const int __sanitizer_XDR_DECODE = 1; const int __sanitizer_XDR_FREE = 2; #endif struct __sanitizer_passwd { char *pw_name; char *pw_passwd; int pw_uid; int pw_gid; #if SANITIZER_MAC || SANITIZER_FREEBSD long pw_change; char *pw_class; #endif #if !(SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32)) char *pw_gecos; #endif char *pw_dir; char *pw_shell; #if SANITIZER_MAC || SANITIZER_FREEBSD long pw_expire; #endif #if SANITIZER_FREEBSD int pw_fields; #endif }; struct __sanitizer_group { char *gr_name; char *gr_passwd; int gr_gid; char **gr_mem; }; #if defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer_time_t; #else typedef long __sanitizer_time_t; #endif struct __sanitizer_timeb { __sanitizer_time_t time; unsigned short millitm; short timezone; short dstflag; }; struct __sanitizer_ether_addr { u8 octet[6]; }; struct __sanitizer_tm { int tm_sec; int tm_min; int tm_hour; int tm_mday; int tm_mon; int tm_year; int tm_wday; int tm_yday; int tm_isdst; long int tm_gmtoff; const char *tm_zone; }; #if SANITIZER_LINUX struct __sanitizer_mntent { char *mnt_fsname; char *mnt_dir; char *mnt_type; char *mnt_opts; int mnt_freq; int mnt_passno; }; #endif #if SANITIZER_MAC || SANITIZER_FREEBSD struct __sanitizer_msghdr { void *msg_name; unsigned msg_namelen; struct __sanitizer_iovec *msg_iov; unsigned msg_iovlen; void *msg_control; unsigned msg_controllen; int msg_flags; }; struct __sanitizer_cmsghdr { unsigned cmsg_len; int cmsg_level; int cmsg_type; }; #else struct __sanitizer_msghdr { void *msg_name; unsigned msg_namelen; struct __sanitizer_iovec *msg_iov; uptr msg_iovlen; void *msg_control; uptr msg_controllen; int msg_flags; }; struct __sanitizer_cmsghdr { uptr cmsg_len; int cmsg_level; int cmsg_type; }; #endif #if SANITIZER_MAC struct __sanitizer_dirent { unsigned long long d_ino; unsigned long long d_seekoff; unsigned short d_reclen; // more fields that we don't care about }; #elif SANITIZER_FREEBSD struct __sanitizer_dirent { - unsigned int d_fileno; + unsigned long long d_fileno; + unsigned long long d_off; unsigned short d_reclen; // more fields that we don't care about }; #elif SANITIZER_ANDROID || defined(__x86_64__) struct __sanitizer_dirent { unsigned long long d_ino; unsigned long long d_off; unsigned short d_reclen; // more fields that we don't care about }; #else struct __sanitizer_dirent { uptr d_ino; uptr d_off; unsigned short d_reclen; // more fields that we don't care about }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_dirent64 { unsigned long long d_ino; unsigned long long d_off; unsigned short d_reclen; // more fields that we don't care about }; #endif // 'clock_t' is 32 bits wide on x64 FreeBSD #if SANITIZER_FREEBSD typedef int __sanitizer_clock_t; #elif defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer_clock_t; #else typedef long __sanitizer_clock_t; #endif #if SANITIZER_LINUX typedef int __sanitizer_clockid_t; #endif #if SANITIZER_LINUX || SANITIZER_FREEBSD #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__)\ || defined(__mips__) typedef unsigned __sanitizer___kernel_uid_t; typedef unsigned __sanitizer___kernel_gid_t; #else typedef unsigned short __sanitizer___kernel_uid_t; typedef unsigned short __sanitizer___kernel_gid_t; #endif #if defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer___kernel_off_t; #else typedef long __sanitizer___kernel_off_t; #endif #if defined(__powerpc__) || defined(__mips__) || defined(__riscv__) typedef unsigned int __sanitizer___kernel_old_uid_t; typedef unsigned int __sanitizer___kernel_old_gid_t; #else typedef unsigned short __sanitizer___kernel_old_uid_t; typedef unsigned short __sanitizer___kernel_old_gid_t; #endif typedef long long __sanitizer___kernel_loff_t; typedef struct { unsigned long fds_bits[1024 / (8 * sizeof(long))]; } __sanitizer___kernel_fd_set; #endif // This thing depends on the platform. We are only interested in the upper // limit. Verified with a compiler assert in .cc. const int pthread_attr_t_max_sz = 128; union __sanitizer_pthread_attr_t { char size[pthread_attr_t_max_sz]; // NOLINT void *align; }; #if SANITIZER_ANDROID # if SANITIZER_MIPS typedef unsigned long __sanitizer_sigset_t[16/sizeof(unsigned long)]; # else typedef unsigned long __sanitizer_sigset_t; # endif #elif SANITIZER_MAC typedef unsigned __sanitizer_sigset_t; #elif SANITIZER_LINUX struct __sanitizer_sigset_t { // The size is determined by looking at sizeof of real sigset_t on linux. uptr val[128 / sizeof(uptr)]; }; #elif SANITIZER_FREEBSD struct __sanitizer_sigset_t { // uint32_t * 4 unsigned int __bits[4]; }; #endif // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros. #if SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 64) struct __sanitizer_sigaction { unsigned sa_flags; union { void (*sigaction)(int sig, void *siginfo, void *uctx); void (*handler)(int sig); }; __sanitizer_sigset_t sa_mask; void (*sa_restorer)(); }; #elif SANITIZER_ANDROID && SANITIZER_MIPS32 // check this before WORDSIZE == 32 struct __sanitizer_sigaction { unsigned sa_flags; union { void (*sigaction)(int sig, void *siginfo, void *uctx); void (*handler)(int sig); }; __sanitizer_sigset_t sa_mask; }; #elif SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32) struct __sanitizer_sigaction { union { void (*sigaction)(int sig, void *siginfo, void *uctx); void (*handler)(int sig); }; __sanitizer_sigset_t sa_mask; uptr sa_flags; void (*sa_restorer)(); }; #else // !SANITIZER_ANDROID struct __sanitizer_sigaction { #if defined(__mips__) && !SANITIZER_FREEBSD unsigned int sa_flags; #endif union { void (*sigaction)(int sig, void *siginfo, void *uctx); void (*handler)(int sig); }; #if SANITIZER_FREEBSD int sa_flags; __sanitizer_sigset_t sa_mask; #else #if defined(__s390x__) int sa_resv; #else __sanitizer_sigset_t sa_mask; #endif #ifndef __mips__ #if defined(__sparc__) #if __GLIBC_PREREQ (2, 20) // On sparc glibc 2.19 and earlier sa_flags was unsigned long. #if defined(__arch64__) // To maintain ABI compatibility on sparc64 when switching to an int, // __glibc_reserved0 was added. int __glibc_reserved0; #endif int sa_flags; #else unsigned long sa_flags; #endif #else int sa_flags; #endif #endif #endif #if SANITIZER_LINUX void (*sa_restorer)(); #endif #if defined(__mips__) && (SANITIZER_WORDSIZE == 32) int sa_resv[1]; #endif #if defined(__s390x__) __sanitizer_sigset_t sa_mask; #endif }; #endif // !SANITIZER_ANDROID #if SANITIZER_FREEBSD typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t; #elif defined(__mips__) struct __sanitizer_kernel_sigset_t { uptr sig[2]; }; #else struct __sanitizer_kernel_sigset_t { u8 sig[8]; }; #endif // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros. #if SANITIZER_MIPS struct __sanitizer_kernel_sigaction_t { unsigned int sa_flags; union { void (*handler)(int signo); void (*sigaction)(int signo, void *info, void *ctx); }; __sanitizer_kernel_sigset_t sa_mask; void (*sa_restorer)(void); }; #else struct __sanitizer_kernel_sigaction_t { union { void (*handler)(int signo); void (*sigaction)(int signo, void *info, void *ctx); }; unsigned long sa_flags; void (*sa_restorer)(void); __sanitizer_kernel_sigset_t sa_mask; }; #endif extern uptr sig_ign; extern uptr sig_dfl; extern uptr sa_siginfo; #if SANITIZER_LINUX extern int e_tabsz; #endif extern int af_inet; extern int af_inet6; uptr __sanitizer_in_addr_sz(int af); #if SANITIZER_LINUX || SANITIZER_FREEBSD struct __sanitizer_dl_phdr_info { uptr dlpi_addr; const char *dlpi_name; const void *dlpi_phdr; short dlpi_phnum; }; extern unsigned struct_ElfW_Phdr_sz; #endif struct __sanitizer_addrinfo { int ai_flags; int ai_family; int ai_socktype; int ai_protocol; #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD unsigned ai_addrlen; char *ai_canonname; void *ai_addr; #else // LINUX unsigned ai_addrlen; void *ai_addr; char *ai_canonname; #endif struct __sanitizer_addrinfo *ai_next; }; struct __sanitizer_hostent { char *h_name; char **h_aliases; int h_addrtype; int h_length; char **h_addr_list; }; struct __sanitizer_pollfd { int fd; short events; short revents; }; #if SANITIZER_ANDROID || SANITIZER_MAC || SANITIZER_FREEBSD typedef unsigned __sanitizer_nfds_t; #else typedef unsigned long __sanitizer_nfds_t; #endif #if !SANITIZER_ANDROID # if SANITIZER_LINUX struct __sanitizer_glob_t { uptr gl_pathc; char **gl_pathv; uptr gl_offs; int gl_flags; void (*gl_closedir)(void *dirp); void *(*gl_readdir)(void *dirp); void *(*gl_opendir)(const char *); int (*gl_lstat)(const char *, void *); int (*gl_stat)(const char *, void *); }; # elif SANITIZER_FREEBSD struct __sanitizer_glob_t { uptr gl_pathc; uptr gl_matchc; uptr gl_offs; int gl_flags; char **gl_pathv; int (*gl_errfunc)(const char*, int); void (*gl_closedir)(void *dirp); struct dirent *(*gl_readdir)(void *dirp); void *(*gl_opendir)(const char*); int (*gl_lstat)(const char*, void* /* struct stat* */); int (*gl_stat)(const char*, void* /* struct stat* */); }; # endif // SANITIZER_FREEBSD # if SANITIZER_LINUX || SANITIZER_FREEBSD extern int glob_nomatch; extern int glob_altdirfunc; # endif #endif // !SANITIZER_ANDROID extern unsigned path_max; struct __sanitizer_wordexp_t { uptr we_wordc; char **we_wordv; uptr we_offs; #if SANITIZER_FREEBSD char *we_strings; uptr we_nbytes; #endif }; #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_FILE { int _flags; char *_IO_read_ptr; char *_IO_read_end; char *_IO_read_base; char *_IO_write_base; char *_IO_write_ptr; char *_IO_write_end; char *_IO_buf_base; char *_IO_buf_end; char *_IO_save_base; char *_IO_backup_base; char *_IO_save_end; void *_markers; __sanitizer_FILE *_chain; int _fileno; }; # define SANITIZER_HAS_STRUCT_FILE 1 #else typedef void __sanitizer_FILE; # define SANITIZER_HAS_STRUCT_FILE 0 #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ defined(__s390__)) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; extern unsigned struct_user_vfpregs_struct_sz; extern int ptrace_peektext; extern int ptrace_peekdata; extern int ptrace_peekuser; extern int ptrace_getregs; extern int ptrace_setregs; extern int ptrace_getfpregs; extern int ptrace_setfpregs; extern int ptrace_getfpxregs; extern int ptrace_setfpxregs; extern int ptrace_getvfpregs; extern int ptrace_setvfpregs; extern int ptrace_getsiginfo; extern int ptrace_setsiginfo; extern int ptrace_getregset; extern int ptrace_setregset; extern int ptrace_geteventmsg; #endif #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID extern unsigned struct_shminfo_sz; extern unsigned struct_shm_info_sz; extern int shmctl_ipc_stat; extern int shmctl_ipc_info; extern int shmctl_shm_info; extern int shmctl_shm_stat; #endif #if !SANITIZER_MAC && !SANITIZER_FREEBSD extern unsigned struct_utmp_sz; #endif #if !SANITIZER_ANDROID extern unsigned struct_utmpx_sz; #endif extern int map_fixed; // ioctl arguments struct __sanitizer_ifconf { int ifc_len; union { void *ifcu_req; } ifc_ifcu; #if SANITIZER_MAC } __attribute__((packed)); #else }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer__obstack_chunk { char *limit; struct __sanitizer__obstack_chunk *prev; }; struct __sanitizer_obstack { long chunk_size; struct __sanitizer__obstack_chunk *chunk; char *object_base; char *next_free; uptr more_fields[7]; }; typedef uptr (*__sanitizer_cookie_io_read)(void *cookie, char *buf, uptr size); typedef uptr (*__sanitizer_cookie_io_write)(void *cookie, const char *buf, uptr size); typedef int (*__sanitizer_cookie_io_seek)(void *cookie, u64 *offset, int whence); typedef int (*__sanitizer_cookie_io_close)(void *cookie); struct __sanitizer_cookie_io_functions_t { __sanitizer_cookie_io_read read; __sanitizer_cookie_io_write write; __sanitizer_cookie_io_seek seek; __sanitizer_cookie_io_close close; }; #endif #define IOC_NRBITS 8 #define IOC_TYPEBITS 8 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \ defined(__sparc__) #define IOC_SIZEBITS 13 #define IOC_DIRBITS 3 #define IOC_NONE 1U #define IOC_WRITE 4U #define IOC_READ 2U #else #define IOC_SIZEBITS 14 #define IOC_DIRBITS 2 #define IOC_NONE 0U #define IOC_WRITE 1U #define IOC_READ 2U #endif #define IOC_NRMASK ((1 << IOC_NRBITS) - 1) #define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1) #define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1) #if defined(IOC_DIRMASK) #undef IOC_DIRMASK #endif #define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1) #define IOC_NRSHIFT 0 #define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS) #define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS) #define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS) #define EVIOC_EV_MAX 0x1f #define EVIOC_ABS_MAX 0x3f #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK) #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK) #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK) #if defined(__sparc__) // In sparc the 14 bits SIZE field overlaps with the // least significant bit of DIR, so either IOC_READ or // IOC_WRITE shall be 1 in order to get a non-zero SIZE. #define IOC_SIZE(nr) \ ((((((nr) >> 29) & 0x7) & (4U | 2U)) == 0) ? 0 : (((nr) >> 16) & 0x3fff)) #else #define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK) #endif extern unsigned struct_ifreq_sz; extern unsigned struct_termios_sz; extern unsigned struct_winsize_sz; #if SANITIZER_LINUX extern unsigned struct_arpreq_sz; extern unsigned struct_cdrom_msf_sz; extern unsigned struct_cdrom_multisession_sz; extern unsigned struct_cdrom_read_audio_sz; extern unsigned struct_cdrom_subchnl_sz; extern unsigned struct_cdrom_ti_sz; extern unsigned struct_cdrom_tocentry_sz; extern unsigned struct_cdrom_tochdr_sz; extern unsigned struct_cdrom_volctrl_sz; extern unsigned struct_ff_effect_sz; extern unsigned struct_floppy_drive_params_sz; extern unsigned struct_floppy_drive_struct_sz; extern unsigned struct_floppy_fdc_state_sz; extern unsigned struct_floppy_max_errors_sz; extern unsigned struct_floppy_raw_cmd_sz; extern unsigned struct_floppy_struct_sz; extern unsigned struct_floppy_write_errors_sz; extern unsigned struct_format_descr_sz; extern unsigned struct_hd_driveid_sz; extern unsigned struct_hd_geometry_sz; extern unsigned struct_input_absinfo_sz; extern unsigned struct_input_id_sz; extern unsigned struct_mtpos_sz; extern unsigned struct_termio_sz; extern unsigned struct_vt_consize_sz; extern unsigned struct_vt_sizes_sz; extern unsigned struct_vt_stat_sz; #endif // SANITIZER_LINUX #if SANITIZER_LINUX || SANITIZER_FREEBSD extern unsigned struct_copr_buffer_sz; extern unsigned struct_copr_debug_buf_sz; extern unsigned struct_copr_msg_sz; extern unsigned struct_midi_info_sz; extern unsigned struct_mtget_sz; extern unsigned struct_mtop_sz; extern unsigned struct_rtentry_sz; extern unsigned struct_sbi_instrument_sz; extern unsigned struct_seq_event_rec_sz; extern unsigned struct_synth_info_sz; extern unsigned struct_vt_mode_sz; #endif // SANITIZER_LINUX || SANITIZER_FREEBSD #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_ax25_parms_struct_sz; extern unsigned struct_cyclades_monitor_sz; extern unsigned struct_input_keymap_entry_sz; extern unsigned struct_ipx_config_data_sz; extern unsigned struct_kbdiacrs_sz; extern unsigned struct_kbentry_sz; extern unsigned struct_kbkeycode_sz; extern unsigned struct_kbsentry_sz; extern unsigned struct_mtconfiginfo_sz; extern unsigned struct_nr_parms_struct_sz; extern unsigned struct_scc_modem_sz; extern unsigned struct_scc_stat_sz; extern unsigned struct_serial_multiport_struct_sz; extern unsigned struct_serial_struct_sz; extern unsigned struct_sockaddr_ax25_sz; extern unsigned struct_unimapdesc_sz; extern unsigned struct_unimapinit_sz; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID extern unsigned struct_audio_buf_info_sz; extern unsigned struct_ppp_stats_sz; #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID #if !SANITIZER_ANDROID && !SANITIZER_MAC extern unsigned struct_sioc_sg_req_sz; extern unsigned struct_sioc_vif_req_sz; #endif // ioctl request identifiers // A special value to mark ioctls that are not present on the target platform, // when it can not be determined without including any system headers. extern const unsigned IOCTL_NOT_PRESENT; extern unsigned IOCTL_FIOASYNC; extern unsigned IOCTL_FIOCLEX; extern unsigned IOCTL_FIOGETOWN; extern unsigned IOCTL_FIONBIO; extern unsigned IOCTL_FIONCLEX; extern unsigned IOCTL_FIOSETOWN; extern unsigned IOCTL_SIOCADDMULTI; extern unsigned IOCTL_SIOCATMARK; extern unsigned IOCTL_SIOCDELMULTI; extern unsigned IOCTL_SIOCGIFADDR; extern unsigned IOCTL_SIOCGIFBRDADDR; extern unsigned IOCTL_SIOCGIFCONF; extern unsigned IOCTL_SIOCGIFDSTADDR; extern unsigned IOCTL_SIOCGIFFLAGS; extern unsigned IOCTL_SIOCGIFMETRIC; extern unsigned IOCTL_SIOCGIFMTU; extern unsigned IOCTL_SIOCGIFNETMASK; extern unsigned IOCTL_SIOCGPGRP; extern unsigned IOCTL_SIOCSIFADDR; extern unsigned IOCTL_SIOCSIFBRDADDR; extern unsigned IOCTL_SIOCSIFDSTADDR; extern unsigned IOCTL_SIOCSIFFLAGS; extern unsigned IOCTL_SIOCSIFMETRIC; extern unsigned IOCTL_SIOCSIFMTU; extern unsigned IOCTL_SIOCSIFNETMASK; extern unsigned IOCTL_SIOCSPGRP; extern unsigned IOCTL_TIOCCONS; extern unsigned IOCTL_TIOCEXCL; extern unsigned IOCTL_TIOCGETD; extern unsigned IOCTL_TIOCGPGRP; extern unsigned IOCTL_TIOCGWINSZ; extern unsigned IOCTL_TIOCMBIC; extern unsigned IOCTL_TIOCMBIS; extern unsigned IOCTL_TIOCMGET; extern unsigned IOCTL_TIOCMSET; extern unsigned IOCTL_TIOCNOTTY; extern unsigned IOCTL_TIOCNXCL; extern unsigned IOCTL_TIOCOUTQ; extern unsigned IOCTL_TIOCPKT; extern unsigned IOCTL_TIOCSCTTY; extern unsigned IOCTL_TIOCSETD; extern unsigned IOCTL_TIOCSPGRP; extern unsigned IOCTL_TIOCSTI; extern unsigned IOCTL_TIOCSWINSZ; #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID extern unsigned IOCTL_SIOCGETSGCNT; extern unsigned IOCTL_SIOCGETVIFCNT; #endif #if SANITIZER_LINUX extern unsigned IOCTL_EVIOCGABS; extern unsigned IOCTL_EVIOCGBIT; extern unsigned IOCTL_EVIOCGEFFECTS; extern unsigned IOCTL_EVIOCGID; extern unsigned IOCTL_EVIOCGKEY; extern unsigned IOCTL_EVIOCGKEYCODE; extern unsigned IOCTL_EVIOCGLED; extern unsigned IOCTL_EVIOCGNAME; extern unsigned IOCTL_EVIOCGPHYS; extern unsigned IOCTL_EVIOCGRAB; extern unsigned IOCTL_EVIOCGREP; extern unsigned IOCTL_EVIOCGSND; extern unsigned IOCTL_EVIOCGSW; extern unsigned IOCTL_EVIOCGUNIQ; extern unsigned IOCTL_EVIOCGVERSION; extern unsigned IOCTL_EVIOCRMFF; extern unsigned IOCTL_EVIOCSABS; extern unsigned IOCTL_EVIOCSFF; extern unsigned IOCTL_EVIOCSKEYCODE; extern unsigned IOCTL_EVIOCSREP; extern unsigned IOCTL_BLKFLSBUF; extern unsigned IOCTL_BLKGETSIZE; extern unsigned IOCTL_BLKRAGET; extern unsigned IOCTL_BLKRASET; extern unsigned IOCTL_BLKROGET; extern unsigned IOCTL_BLKROSET; extern unsigned IOCTL_BLKRRPART; extern unsigned IOCTL_CDROMAUDIOBUFSIZ; extern unsigned IOCTL_CDROMEJECT; extern unsigned IOCTL_CDROMEJECT_SW; extern unsigned IOCTL_CDROMMULTISESSION; extern unsigned IOCTL_CDROMPAUSE; extern unsigned IOCTL_CDROMPLAYMSF; extern unsigned IOCTL_CDROMPLAYTRKIND; extern unsigned IOCTL_CDROMREADAUDIO; extern unsigned IOCTL_CDROMREADCOOKED; extern unsigned IOCTL_CDROMREADMODE1; extern unsigned IOCTL_CDROMREADMODE2; extern unsigned IOCTL_CDROMREADRAW; extern unsigned IOCTL_CDROMREADTOCENTRY; extern unsigned IOCTL_CDROMREADTOCHDR; extern unsigned IOCTL_CDROMRESET; extern unsigned IOCTL_CDROMRESUME; extern unsigned IOCTL_CDROMSEEK; extern unsigned IOCTL_CDROMSTART; extern unsigned IOCTL_CDROMSTOP; extern unsigned IOCTL_CDROMSUBCHNL; extern unsigned IOCTL_CDROMVOLCTRL; extern unsigned IOCTL_CDROMVOLREAD; extern unsigned IOCTL_CDROM_GET_UPC; extern unsigned IOCTL_FDCLRPRM; extern unsigned IOCTL_FDDEFPRM; extern unsigned IOCTL_FDFLUSH; extern unsigned IOCTL_FDFMTBEG; extern unsigned IOCTL_FDFMTEND; extern unsigned IOCTL_FDFMTTRK; extern unsigned IOCTL_FDGETDRVPRM; extern unsigned IOCTL_FDGETDRVSTAT; extern unsigned IOCTL_FDGETDRVTYP; extern unsigned IOCTL_FDGETFDCSTAT; extern unsigned IOCTL_FDGETMAXERRS; extern unsigned IOCTL_FDGETPRM; extern unsigned IOCTL_FDMSGOFF; extern unsigned IOCTL_FDMSGON; extern unsigned IOCTL_FDPOLLDRVSTAT; extern unsigned IOCTL_FDRAWCMD; extern unsigned IOCTL_FDRESET; extern unsigned IOCTL_FDSETDRVPRM; extern unsigned IOCTL_FDSETEMSGTRESH; extern unsigned IOCTL_FDSETMAXERRS; extern unsigned IOCTL_FDSETPRM; extern unsigned IOCTL_FDTWADDLE; extern unsigned IOCTL_FDWERRORCLR; extern unsigned IOCTL_FDWERRORGET; extern unsigned IOCTL_HDIO_DRIVE_CMD; extern unsigned IOCTL_HDIO_GETGEO; extern unsigned IOCTL_HDIO_GET_32BIT; extern unsigned IOCTL_HDIO_GET_DMA; extern unsigned IOCTL_HDIO_GET_IDENTITY; extern unsigned IOCTL_HDIO_GET_KEEPSETTINGS; extern unsigned IOCTL_HDIO_GET_MULTCOUNT; extern unsigned IOCTL_HDIO_GET_NOWERR; extern unsigned IOCTL_HDIO_GET_UNMASKINTR; extern unsigned IOCTL_HDIO_SET_32BIT; extern unsigned IOCTL_HDIO_SET_DMA; extern unsigned IOCTL_HDIO_SET_KEEPSETTINGS; extern unsigned IOCTL_HDIO_SET_MULTCOUNT; extern unsigned IOCTL_HDIO_SET_NOWERR; extern unsigned IOCTL_HDIO_SET_UNMASKINTR; extern unsigned IOCTL_MTIOCPOS; extern unsigned IOCTL_PPPIOCGASYNCMAP; extern unsigned IOCTL_PPPIOCGDEBUG; extern unsigned IOCTL_PPPIOCGFLAGS; extern unsigned IOCTL_PPPIOCGUNIT; extern unsigned IOCTL_PPPIOCGXASYNCMAP; extern unsigned IOCTL_PPPIOCSASYNCMAP; extern unsigned IOCTL_PPPIOCSDEBUG; extern unsigned IOCTL_PPPIOCSFLAGS; extern unsigned IOCTL_PPPIOCSMAXCID; extern unsigned IOCTL_PPPIOCSMRU; extern unsigned IOCTL_PPPIOCSXASYNCMAP; extern unsigned IOCTL_SIOCDARP; extern unsigned IOCTL_SIOCDRARP; extern unsigned IOCTL_SIOCGARP; extern unsigned IOCTL_SIOCGIFENCAP; extern unsigned IOCTL_SIOCGIFHWADDR; extern unsigned IOCTL_SIOCGIFMAP; extern unsigned IOCTL_SIOCGIFMEM; extern unsigned IOCTL_SIOCGIFNAME; extern unsigned IOCTL_SIOCGIFSLAVE; extern unsigned IOCTL_SIOCGRARP; extern unsigned IOCTL_SIOCGSTAMP; extern unsigned IOCTL_SIOCSARP; extern unsigned IOCTL_SIOCSIFENCAP; extern unsigned IOCTL_SIOCSIFHWADDR; extern unsigned IOCTL_SIOCSIFLINK; extern unsigned IOCTL_SIOCSIFMAP; extern unsigned IOCTL_SIOCSIFMEM; extern unsigned IOCTL_SIOCSIFSLAVE; extern unsigned IOCTL_SIOCSRARP; extern unsigned IOCTL_SNDCTL_COPR_HALT; extern unsigned IOCTL_SNDCTL_COPR_LOAD; extern unsigned IOCTL_SNDCTL_COPR_RCODE; extern unsigned IOCTL_SNDCTL_COPR_RCVMSG; extern unsigned IOCTL_SNDCTL_COPR_RDATA; extern unsigned IOCTL_SNDCTL_COPR_RESET; extern unsigned IOCTL_SNDCTL_COPR_RUN; extern unsigned IOCTL_SNDCTL_COPR_SENDMSG; extern unsigned IOCTL_SNDCTL_COPR_WCODE; extern unsigned IOCTL_SNDCTL_COPR_WDATA; extern unsigned IOCTL_TCFLSH; extern unsigned IOCTL_TCGETA; extern unsigned IOCTL_TCGETS; extern unsigned IOCTL_TCSBRK; extern unsigned IOCTL_TCSBRKP; extern unsigned IOCTL_TCSETA; extern unsigned IOCTL_TCSETAF; extern unsigned IOCTL_TCSETAW; extern unsigned IOCTL_TCSETS; extern unsigned IOCTL_TCSETSF; extern unsigned IOCTL_TCSETSW; extern unsigned IOCTL_TCXONC; extern unsigned IOCTL_TIOCGLCKTRMIOS; extern unsigned IOCTL_TIOCGSOFTCAR; extern unsigned IOCTL_TIOCINQ; extern unsigned IOCTL_TIOCLINUX; extern unsigned IOCTL_TIOCSERCONFIG; extern unsigned IOCTL_TIOCSERGETLSR; extern unsigned IOCTL_TIOCSERGWILD; extern unsigned IOCTL_TIOCSERSWILD; extern unsigned IOCTL_TIOCSLCKTRMIOS; extern unsigned IOCTL_TIOCSSOFTCAR; extern unsigned IOCTL_VT_DISALLOCATE; extern unsigned IOCTL_VT_GETSTATE; extern unsigned IOCTL_VT_RESIZE; extern unsigned IOCTL_VT_RESIZEX; extern unsigned IOCTL_VT_SENDSIG; #endif // SANITIZER_LINUX #if SANITIZER_LINUX || SANITIZER_FREEBSD extern unsigned IOCTL_MTIOCGET; extern unsigned IOCTL_MTIOCTOP; extern unsigned IOCTL_SIOCADDRT; extern unsigned IOCTL_SIOCDELRT; extern unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE; extern unsigned IOCTL_SNDCTL_DSP_GETFMTS; extern unsigned IOCTL_SNDCTL_DSP_NONBLOCK; extern unsigned IOCTL_SNDCTL_DSP_POST; extern unsigned IOCTL_SNDCTL_DSP_RESET; extern unsigned IOCTL_SNDCTL_DSP_SETFMT; extern unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT; extern unsigned IOCTL_SNDCTL_DSP_SPEED; extern unsigned IOCTL_SNDCTL_DSP_STEREO; extern unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE; extern unsigned IOCTL_SNDCTL_DSP_SYNC; extern unsigned IOCTL_SNDCTL_FM_4OP_ENABLE; extern unsigned IOCTL_SNDCTL_FM_LOAD_INSTR; extern unsigned IOCTL_SNDCTL_MIDI_INFO; extern unsigned IOCTL_SNDCTL_MIDI_PRETIME; extern unsigned IOCTL_SNDCTL_SEQ_CTRLRATE; extern unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT; extern unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT; extern unsigned IOCTL_SNDCTL_SEQ_NRMIDIS; extern unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS; extern unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND; extern unsigned IOCTL_SNDCTL_SEQ_PANIC; extern unsigned IOCTL_SNDCTL_SEQ_PERCMODE; extern unsigned IOCTL_SNDCTL_SEQ_RESET; extern unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES; extern unsigned IOCTL_SNDCTL_SEQ_SYNC; extern unsigned IOCTL_SNDCTL_SEQ_TESTMIDI; extern unsigned IOCTL_SNDCTL_SEQ_THRESHOLD; extern unsigned IOCTL_SNDCTL_SYNTH_INFO; extern unsigned IOCTL_SNDCTL_SYNTH_MEMAVL; extern unsigned IOCTL_SNDCTL_TMR_CONTINUE; extern unsigned IOCTL_SNDCTL_TMR_METRONOME; extern unsigned IOCTL_SNDCTL_TMR_SELECT; extern unsigned IOCTL_SNDCTL_TMR_SOURCE; extern unsigned IOCTL_SNDCTL_TMR_START; extern unsigned IOCTL_SNDCTL_TMR_STOP; extern unsigned IOCTL_SNDCTL_TMR_TEMPO; extern unsigned IOCTL_SNDCTL_TMR_TIMEBASE; extern unsigned IOCTL_SOUND_MIXER_READ_ALTPCM; extern unsigned IOCTL_SOUND_MIXER_READ_BASS; extern unsigned IOCTL_SOUND_MIXER_READ_CAPS; extern unsigned IOCTL_SOUND_MIXER_READ_CD; extern unsigned IOCTL_SOUND_MIXER_READ_DEVMASK; extern unsigned IOCTL_SOUND_MIXER_READ_ENHANCE; extern unsigned IOCTL_SOUND_MIXER_READ_IGAIN; extern unsigned IOCTL_SOUND_MIXER_READ_IMIX; extern unsigned IOCTL_SOUND_MIXER_READ_LINE1; extern unsigned IOCTL_SOUND_MIXER_READ_LINE2; extern unsigned IOCTL_SOUND_MIXER_READ_LINE3; extern unsigned IOCTL_SOUND_MIXER_READ_LINE; extern unsigned IOCTL_SOUND_MIXER_READ_LOUD; extern unsigned IOCTL_SOUND_MIXER_READ_MIC; extern unsigned IOCTL_SOUND_MIXER_READ_MUTE; extern unsigned IOCTL_SOUND_MIXER_READ_OGAIN; extern unsigned IOCTL_SOUND_MIXER_READ_PCM; extern unsigned IOCTL_SOUND_MIXER_READ_RECLEV; extern unsigned IOCTL_SOUND_MIXER_READ_RECMASK; extern unsigned IOCTL_SOUND_MIXER_READ_RECSRC; extern unsigned IOCTL_SOUND_MIXER_READ_SPEAKER; extern unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS; extern unsigned IOCTL_SOUND_MIXER_READ_SYNTH; extern unsigned IOCTL_SOUND_MIXER_READ_TREBLE; extern unsigned IOCTL_SOUND_MIXER_READ_VOLUME; extern unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM; extern unsigned IOCTL_SOUND_MIXER_WRITE_BASS; extern unsigned IOCTL_SOUND_MIXER_WRITE_CD; extern unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE; extern unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN; extern unsigned IOCTL_SOUND_MIXER_WRITE_IMIX; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE1; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE2; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE3; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE; extern unsigned IOCTL_SOUND_MIXER_WRITE_LOUD; extern unsigned IOCTL_SOUND_MIXER_WRITE_MIC; extern unsigned IOCTL_SOUND_MIXER_WRITE_MUTE; extern unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN; extern unsigned IOCTL_SOUND_MIXER_WRITE_PCM; extern unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV; extern unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC; extern unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER; extern unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH; extern unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE; extern unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME; extern unsigned IOCTL_SOUND_PCM_READ_BITS; extern unsigned IOCTL_SOUND_PCM_READ_CHANNELS; extern unsigned IOCTL_SOUND_PCM_READ_FILTER; extern unsigned IOCTL_SOUND_PCM_READ_RATE; extern unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS; extern unsigned IOCTL_SOUND_PCM_WRITE_FILTER; extern unsigned IOCTL_VT_ACTIVATE; extern unsigned IOCTL_VT_GETMODE; extern unsigned IOCTL_VT_OPENQRY; extern unsigned IOCTL_VT_RELDISP; extern unsigned IOCTL_VT_SETMODE; extern unsigned IOCTL_VT_WAITACTIVE; #endif // SANITIZER_LINUX || SANITIZER_FREEBSD #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned IOCTL_CYGETDEFTHRESH; extern unsigned IOCTL_CYGETDEFTIMEOUT; extern unsigned IOCTL_CYGETMON; extern unsigned IOCTL_CYGETTHRESH; extern unsigned IOCTL_CYGETTIMEOUT; extern unsigned IOCTL_CYSETDEFTHRESH; extern unsigned IOCTL_CYSETDEFTIMEOUT; extern unsigned IOCTL_CYSETTHRESH; extern unsigned IOCTL_CYSETTIMEOUT; extern unsigned IOCTL_EQL_EMANCIPATE; extern unsigned IOCTL_EQL_ENSLAVE; extern unsigned IOCTL_EQL_GETMASTRCFG; extern unsigned IOCTL_EQL_GETSLAVECFG; extern unsigned IOCTL_EQL_SETMASTRCFG; extern unsigned IOCTL_EQL_SETSLAVECFG; extern unsigned IOCTL_EVIOCGKEYCODE_V2; extern unsigned IOCTL_EVIOCGPROP; extern unsigned IOCTL_EVIOCSKEYCODE_V2; extern unsigned IOCTL_FS_IOC_GETFLAGS; extern unsigned IOCTL_FS_IOC_GETVERSION; extern unsigned IOCTL_FS_IOC_SETFLAGS; extern unsigned IOCTL_FS_IOC_SETVERSION; extern unsigned IOCTL_GIO_CMAP; extern unsigned IOCTL_GIO_FONT; extern unsigned IOCTL_GIO_UNIMAP; extern unsigned IOCTL_GIO_UNISCRNMAP; extern unsigned IOCTL_KDADDIO; extern unsigned IOCTL_KDDELIO; extern unsigned IOCTL_KDGETKEYCODE; extern unsigned IOCTL_KDGKBDIACR; extern unsigned IOCTL_KDGKBENT; extern unsigned IOCTL_KDGKBLED; extern unsigned IOCTL_KDGKBMETA; extern unsigned IOCTL_KDGKBSENT; extern unsigned IOCTL_KDMAPDISP; extern unsigned IOCTL_KDSETKEYCODE; extern unsigned IOCTL_KDSIGACCEPT; extern unsigned IOCTL_KDSKBDIACR; extern unsigned IOCTL_KDSKBENT; extern unsigned IOCTL_KDSKBLED; extern unsigned IOCTL_KDSKBMETA; extern unsigned IOCTL_KDSKBSENT; extern unsigned IOCTL_KDUNMAPDISP; extern unsigned IOCTL_LPABORT; extern unsigned IOCTL_LPABORTOPEN; extern unsigned IOCTL_LPCAREFUL; extern unsigned IOCTL_LPCHAR; extern unsigned IOCTL_LPGETIRQ; extern unsigned IOCTL_LPGETSTATUS; extern unsigned IOCTL_LPRESET; extern unsigned IOCTL_LPSETIRQ; extern unsigned IOCTL_LPTIME; extern unsigned IOCTL_LPWAIT; extern unsigned IOCTL_MTIOCGETCONFIG; extern unsigned IOCTL_MTIOCSETCONFIG; extern unsigned IOCTL_PIO_CMAP; extern unsigned IOCTL_PIO_FONT; extern unsigned IOCTL_PIO_UNIMAP; extern unsigned IOCTL_PIO_UNIMAPCLR; extern unsigned IOCTL_PIO_UNISCRNMAP; extern unsigned IOCTL_SCSI_IOCTL_GET_IDLUN; extern unsigned IOCTL_SCSI_IOCTL_PROBE_HOST; extern unsigned IOCTL_SCSI_IOCTL_TAGGED_DISABLE; extern unsigned IOCTL_SCSI_IOCTL_TAGGED_ENABLE; extern unsigned IOCTL_SIOCAIPXITFCRT; extern unsigned IOCTL_SIOCAIPXPRISLT; extern unsigned IOCTL_SIOCAX25ADDUID; extern unsigned IOCTL_SIOCAX25DELUID; extern unsigned IOCTL_SIOCAX25GETPARMS; extern unsigned IOCTL_SIOCAX25GETUID; extern unsigned IOCTL_SIOCAX25NOUID; extern unsigned IOCTL_SIOCAX25SETPARMS; extern unsigned IOCTL_SIOCDEVPLIP; extern unsigned IOCTL_SIOCIPXCFGDATA; extern unsigned IOCTL_SIOCNRDECOBS; extern unsigned IOCTL_SIOCNRGETPARMS; extern unsigned IOCTL_SIOCNRRTCTL; extern unsigned IOCTL_SIOCNRSETPARMS; extern unsigned IOCTL_SNDCTL_DSP_GETISPACE; extern unsigned IOCTL_SNDCTL_DSP_GETOSPACE; extern unsigned IOCTL_TIOCGSERIAL; extern unsigned IOCTL_TIOCSERGETMULTI; extern unsigned IOCTL_TIOCSERSETMULTI; extern unsigned IOCTL_TIOCSSERIAL; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID extern unsigned IOCTL_GIO_SCRNMAP; extern unsigned IOCTL_KDDISABIO; extern unsigned IOCTL_KDENABIO; extern unsigned IOCTL_KDGETLED; extern unsigned IOCTL_KDGETMODE; extern unsigned IOCTL_KDGKBMODE; extern unsigned IOCTL_KDGKBTYPE; extern unsigned IOCTL_KDMKTONE; extern unsigned IOCTL_KDSETLED; extern unsigned IOCTL_KDSETMODE; extern unsigned IOCTL_KDSKBMODE; extern unsigned IOCTL_KIOCSOUND; extern unsigned IOCTL_PIO_SCRNMAP; #endif extern const int errno_EINVAL; extern const int errno_EOWNERDEAD; extern const int si_SEGV_MAPERR; extern const int si_SEGV_ACCERR; } // namespace __sanitizer #define CHECK_TYPE_SIZE(TYPE) \ COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE)) #define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER) \ COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *) NULL)->MEMBER) == \ sizeof(((CLASS *) NULL)->MEMBER)); \ COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) == \ offsetof(CLASS, MEMBER)) // For sigaction, which is a function and struct at the same time, // and thus requires explicit "struct" in sizeof() expression. #define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER) \ COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *) NULL)->MEMBER) == \ sizeof(((struct CLASS *) NULL)->MEMBER)); \ COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) == \ offsetof(struct CLASS, MEMBER)) #endif Index: head/contrib/openbsm/libbsm/bsm_wrappers.c =================================================================== --- head/contrib/openbsm/libbsm/bsm_wrappers.c (revision 318735) +++ head/contrib/openbsm/libbsm/bsm_wrappers.c (revision 318736) @@ -1,851 +1,853 @@ /*- * Copyright (c) 2004-2009 Apple Inc. * Copyright (c) 2016 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifdef __APPLE__ #define _SYS_AUDIT_H /* Prevent include of sys/audit.h. */ #endif #include #include #ifdef __APPLE__ #include /* Our bsm/audit.h doesn't include queue.h. */ #endif #include #include #include #include #include #include #include /* These are not advertised in libbsm.h */ int audit_set_terminal_port(dev_t *p); int audit_set_terminal_host(uint32_t *m); /* * General purpose audit submission mechanism for userspace. */ int audit_submit(short au_event, au_id_t auid, char status, int reterr, const char *fmt, ...) { char text[MAX_AUDITSTRING_LEN]; token_t *token; int acond; va_list ap; pid_t pid; int error, afd, subj_ex; struct auditinfo ai; struct auditinfo_addr aia; au_tid_t atid; if (audit_get_cond(&acond) != 0) { /* * If auditon(2) returns ENOSYS, then audit has not been * compiled into the kernel, so just return. */ if (errno == ENOSYS) return (0); error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: auditon failed: %s", strerror(errno)); errno = error; return (-1); } if (acond == AUC_NOAUDIT) return (0); afd = au_open(); if (afd < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: au_open failed: %s", strerror(errno)); errno = error; return (-1); } /* * Try to use getaudit_addr(2) first. If this kernel does not support * it, then fall back on to getaudit(2). */ subj_ex = 0; error = getaudit_addr(&aia, sizeof(aia)); if (error < 0 && errno == ENOSYS) { error = getaudit(&ai); if (error < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: getaudit failed: %s", strerror(errno)); errno = error; return (-1); } /* * Convert this auditinfo_t to an auditinfo_addr_t to make the * following code less complicated wrt to preselection and * subject token generation. */ aia.ai_auid = ai.ai_auid; aia.ai_mask = ai.ai_mask; aia.ai_asid = ai.ai_asid; aia.ai_termid.at_type = AU_IPv4; aia.ai_termid.at_addr[0] = ai.ai_termid.machine; aia.ai_termid.at_port = ai.ai_termid.port; } else if (error < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: getaudit_addr failed: %s", strerror(errno)); errno = error; return (-1); } /* * NB: We should be performing pre-selection here now that we have the * masks for this process. */ if (aia.ai_termid.at_type == AU_IPv6) subj_ex = 1; pid = getpid(); if (subj_ex == 0) { atid.port = aia.ai_termid.at_port; atid.machine = aia.ai_termid.at_addr[0]; token = au_to_subject32(auid, geteuid(), getegid(), getuid(), getgid(), pid, pid, &atid); } else token = au_to_subject_ex(auid, geteuid(), getegid(), getuid(), getgid(), pid, pid, &aia.ai_termid); if (token == NULL) { syslog(LOG_AUTH | LOG_ERR, "audit: unable to build subject token"); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = EPERM; return (-1); } if (au_write(afd, token) < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: au_write failed: %s", strerror(errno)); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = error; return (-1); } if (fmt != NULL) { va_start(ap, fmt); (void) vsnprintf(text, MAX_AUDITSTRING_LEN, fmt, ap); va_end(ap); token = au_to_text(text); if (token == NULL) { syslog(LOG_AUTH | LOG_ERR, "audit: failed to generate text token"); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = EPERM; return (-1); } if (au_write(afd, token) < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: au_write failed: %s", strerror(errno)); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = error; return (-1); } } token = au_to_return32(au_errno_to_bsm(status), reterr); if (token == NULL) { syslog(LOG_AUTH | LOG_ERR, "audit: unable to build return token"); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = EPERM; return (-1); } if (au_write(afd, token) < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: au_write failed: %s", strerror(errno)); (void) au_close(afd, AU_TO_NO_WRITE, au_event); errno = error; return (-1); } if (au_close(afd, AU_TO_WRITE, au_event) < 0) { error = errno; syslog(LOG_AUTH | LOG_ERR, "audit: record not committed"); errno = error; return (-1); } return (0); } int audit_set_terminal_port(dev_t *p) { struct stat st; if (p == NULL) return (kAUBadParamErr); #ifdef NODEV *p = NODEV; #else *p = -1; #endif /* for /usr/bin/login, try fstat() first */ if (fstat(STDIN_FILENO, &st) != 0) { if (errno != EBADF) { syslog(LOG_ERR, "fstat() failed (%s)", strerror(errno)); return (kAUStatErr); } if (stat("/dev/console", &st) != 0) { syslog(LOG_ERR, "stat() failed (%s)", strerror(errno)); return (kAUStatErr); } } *p = st.st_rdev; return (kAUNoErr); } int audit_set_terminal_host(uint32_t *m) { #ifdef KERN_HOSTID int name[2] = { CTL_KERN, KERN_HOSTID }; size_t len; if (m == NULL) return (kAUBadParamErr); *m = 0; len = sizeof(*m); if (sysctl(name, 2, m, &len, NULL, 0) != 0) { syslog(LOG_ERR, "sysctl() failed (%s)", strerror(errno)); return (kAUSysctlErr); } return (kAUNoErr); #else *m = -1; return (kAUNoErr); #endif } int audit_set_terminal_id(au_tid_t *tid) { + dev_t port; int ret; if (tid == NULL) return (kAUBadParamErr); - if ((ret = audit_set_terminal_port(&tid->port)) != kAUNoErr) + if ((ret = audit_set_terminal_port(&port)) != kAUNoErr) return (ret); + tid->port = port; return (audit_set_terminal_host(&tid->machine)); } /* * This is OK for those callers who have only one token to write. If you have * multiple tokens that logically form part of the same audit record, you need * to use the existing au_open()/au_write()/au_close() API: * * aufd = au_open(); * tok = au_to_random_token_1(...); * au_write(aufd, tok); * tok = au_to_random_token_2(...); * au_write(aufd, tok); * ... * au_close(aufd, AU_TO_WRITE, AUE_your_event_type); * * Assumes, like all wrapper calls, that the caller has previously checked * that auditing is enabled via the audit_get_state() call. * * XXX: Should be more robust against bad arguments. */ int audit_write(short event_code, token_t *subject, token_t *misctok, char retval, int errcode) { int aufd; char *func = "audit_write()"; token_t *rettok; if ((aufd = au_open()) == -1) { au_free_token(subject); au_free_token(misctok); syslog(LOG_ERR, "%s: au_open() failed", func); return (kAUOpenErr); } /* Save subject. */ if (subject && au_write(aufd, subject) == -1) { au_free_token(subject); au_free_token(misctok); (void)au_close(aufd, AU_TO_NO_WRITE, event_code); syslog(LOG_ERR, "%s: write of subject failed", func); return (kAUWriteSubjectTokErr); } /* Save the event-specific token. */ if (misctok && au_write(aufd, misctok) == -1) { au_free_token(misctok); (void)au_close(aufd, AU_TO_NO_WRITE, event_code); syslog(LOG_ERR, "%s: write of caller token failed", func); return (kAUWriteCallerTokErr); } /* Tokenize and save the return value. */ if ((rettok = au_to_return32(retval, errcode)) == NULL) { (void)au_close(aufd, AU_TO_NO_WRITE, event_code); syslog(LOG_ERR, "%s: au_to_return32() failed", func); return (kAUMakeReturnTokErr); } if (au_write(aufd, rettok) == -1) { au_free_token(rettok); (void)au_close(aufd, AU_TO_NO_WRITE, event_code); syslog(LOG_ERR, "%s: write of return code failed", func); return (kAUWriteReturnTokErr); } /* * We assume the caller wouldn't have bothered with this * function if it hadn't already decided to keep the record. */ if (au_close(aufd, AU_TO_WRITE, event_code) < 0) { syslog(LOG_ERR, "%s: au_close() failed", func); return (kAUCloseErr); } return (kAUNoErr); } /* * Same caveats as audit_write(). In addition, this function explicitly * assumes success; use audit_write_failure() on error. */ int audit_write_success(short event_code, token_t *tok, au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid) { char *func = "audit_write_success()"; token_t *subject = NULL; /* Tokenize and save subject. */ subject = au_to_subject32(auid, euid, egid, ruid, rgid, pid, sid, tid); if (subject == NULL) { syslog(LOG_ERR, "%s: au_to_subject32() failed", func); return kAUMakeSubjectTokErr; } return (audit_write(event_code, subject, tok, 0, 0)); } /* * Same caveats as audit_write(). In addition, this function explicitly * assumes success; use audit_write_failure_self() on error. */ int audit_write_success_self(short event_code, token_t *tok) { token_t *subject; char *func = "audit_write_success_self()"; if ((subject = au_to_me()) == NULL) { syslog(LOG_ERR, "%s: au_to_me() failed", func); return (kAUMakeSubjectTokErr); } return (audit_write(event_code, subject, tok, 0, 0)); } /* * Same caveats as audit_write(). In addition, this function explicitly * assumes failure; use audit_write_success() otherwise. * * XXX This should let the caller pass an error return value rather than * hard-coding -1. */ int audit_write_failure(short event_code, char *errmsg, int errcode, au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid) { char *func = "audit_write_failure()"; token_t *subject, *errtok; subject = au_to_subject32(auid, euid, egid, ruid, rgid, pid, sid, tid); if (subject == NULL) { syslog(LOG_ERR, "%s: au_to_subject32() failed", func); return (kAUMakeSubjectTokErr); } /* tokenize and save the error message */ if ((errtok = au_to_text(errmsg)) == NULL) { au_free_token(subject); syslog(LOG_ERR, "%s: au_to_text() failed", func); return (kAUMakeTextTokErr); } return (audit_write(event_code, subject, errtok, -1, errcode)); } /* * Same caveats as audit_write(). In addition, this function explicitly * assumes failure; use audit_write_success_self() otherwise. * * XXX This should let the caller pass an error return value rather than * hard-coding -1. */ int audit_write_failure_self(short event_code, char *errmsg, int errret) { char *func = "audit_write_failure_self()"; token_t *subject, *errtok; if ((subject = au_to_me()) == NULL) { syslog(LOG_ERR, "%s: au_to_me() failed", func); return (kAUMakeSubjectTokErr); } /* tokenize and save the error message */ if ((errtok = au_to_text(errmsg)) == NULL) { au_free_token(subject); syslog(LOG_ERR, "%s: au_to_text() failed", func); return (kAUMakeTextTokErr); } return (audit_write(event_code, subject, errtok, -1, errret)); } /* * For auditing errors during login. Such errors are implicitly * non-attributable (i.e., not ascribable to any user). * * Assumes, like all wrapper calls, that the caller has previously checked * that auditing is enabled via the audit_get_state() call. */ int audit_write_failure_na(short event_code, char *errmsg, int errret, uid_t euid, uid_t egid, pid_t pid, au_tid_t *tid) { return (audit_write_failure(event_code, errmsg, errret, -1, euid, egid, -1, -1, pid, -1, tid)); } /* END OF au_write() WRAPPERS */ #ifdef __APPLE__ void audit_token_to_au32(audit_token_t atoken, uid_t *auidp, uid_t *euidp, gid_t *egidp, uid_t *ruidp, gid_t *rgidp, pid_t *pidp, au_asid_t *asidp, au_tid_t *tidp) { if (auidp != NULL) *auidp = (uid_t)atoken.val[0]; if (euidp != NULL) *euidp = (uid_t)atoken.val[1]; if (egidp != NULL) *egidp = (gid_t)atoken.val[2]; if (ruidp != NULL) *ruidp = (uid_t)atoken.val[3]; if (rgidp != NULL) *rgidp = (gid_t)atoken.val[4]; if (pidp != NULL) *pidp = (pid_t)atoken.val[5]; if (asidp != NULL) *asidp = (au_asid_t)atoken.val[6]; if (tidp != NULL) { audit_set_terminal_host(&tidp->machine); tidp->port = (dev_t)atoken.val[7]; } } #endif /* !__APPLE__ */ int audit_get_cond(int *cond) { int ret; ret = auditon(A_GETCOND, cond, sizeof(*cond)); #ifdef A_OLDGETCOND if ((0 != ret) && EINVAL == errno) { long lcond = *cond; ret = auditon(A_OLDGETCOND, &lcond, sizeof(lcond)); *cond = (int)lcond; } #endif return (ret); } int audit_set_cond(int *cond) { int ret; ret = auditon(A_SETCOND, cond, sizeof(*cond)); #ifdef A_OLDSETCOND if ((0 != ret) && (EINVAL == errno)) { long lcond = (long)*cond; ret = auditon(A_OLDSETCOND, &lcond, sizeof(lcond)); *cond = (int)lcond; } #endif return (ret); } int audit_get_policy(int *policy) { int ret; ret = auditon(A_GETPOLICY, policy, sizeof(*policy)); #ifdef A_OLDGETPOLICY if ((0 != ret) && (EINVAL == errno)){ long lpolicy = (long)*policy; ret = auditon(A_OLDGETPOLICY, &lpolicy, sizeof(lpolicy)); *policy = (int)lpolicy; } #endif return (ret); } int audit_set_policy(int *policy) { int ret; ret = auditon(A_SETPOLICY, policy, sizeof(*policy)); #ifdef A_OLDSETPOLICY if ((0 != ret) && (EINVAL == errno)){ long lpolicy = (long)*policy; ret = auditon(A_OLDSETPOLICY, &lpolicy, sizeof(lpolicy)); *policy = (int)lpolicy; } #endif return (ret); } int audit_get_qctrl(au_qctrl_t *qctrl, size_t sz) { int ret; if (sizeof(*qctrl) != sz) { errno = EINVAL; return (-1); } ret = auditon(A_GETQCTRL, qctrl, sizeof(*qctrl)); #ifdef A_OLDGETQCTRL if ((0 != ret) && (EINVAL == errno)){ struct old_qctrl { size_t oq_hiwater; size_t oq_lowater; size_t oq_bufsz; clock_t oq_delay; int oq_minfree; } oq; oq.oq_hiwater = (size_t)qctrl->aq_hiwater; oq.oq_lowater = (size_t)qctrl->aq_lowater; oq.oq_bufsz = (size_t)qctrl->aq_bufsz; oq.oq_delay = (clock_t)qctrl->aq_delay; oq.oq_minfree = qctrl->aq_minfree; ret = auditon(A_OLDGETQCTRL, &oq, sizeof(oq)); qctrl->aq_hiwater = (int)oq.oq_hiwater; qctrl->aq_lowater = (int)oq.oq_lowater; qctrl->aq_bufsz = (int)oq.oq_bufsz; qctrl->aq_delay = (int)oq.oq_delay; qctrl->aq_minfree = oq.oq_minfree; } #endif /* A_OLDGETQCTRL */ return (ret); } int audit_set_qctrl(au_qctrl_t *qctrl, size_t sz) { int ret; if (sizeof(*qctrl) != sz) { errno = EINVAL; return (-1); } ret = auditon(A_SETQCTRL, qctrl, sz); #ifdef A_OLDSETQCTRL if ((0 != ret) && (EINVAL == errno)) { struct old_qctrl { size_t oq_hiwater; size_t oq_lowater; size_t oq_bufsz; clock_t oq_delay; int oq_minfree; } oq; oq.oq_hiwater = (size_t)qctrl->aq_hiwater; oq.oq_lowater = (size_t)qctrl->aq_lowater; oq.oq_bufsz = (size_t)qctrl->aq_bufsz; oq.oq_delay = (clock_t)qctrl->aq_delay; oq.oq_minfree = qctrl->aq_minfree; ret = auditon(A_OLDSETQCTRL, &oq, sizeof(oq)); qctrl->aq_hiwater = (int)oq.oq_hiwater; qctrl->aq_lowater = (int)oq.oq_lowater; qctrl->aq_bufsz = (int)oq.oq_bufsz; qctrl->aq_delay = (int)oq.oq_delay; qctrl->aq_minfree = oq.oq_minfree; } #endif /* A_OLDSETQCTRL */ return (ret); } int audit_send_trigger(int *trigger) { return (auditon(A_SENDTRIGGER, trigger, sizeof(*trigger))); } int audit_get_kaudit(auditinfo_addr_t *aia, size_t sz) { if (sizeof(*aia) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETKAUDIT, aia, sz)); } int audit_set_kaudit(auditinfo_addr_t *aia, size_t sz) { if (sizeof(*aia) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETKAUDIT, aia, sz)); } int audit_get_class(au_evclass_map_t *evc_map, size_t sz) { if (sizeof(*evc_map) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETCLASS, evc_map, sz)); } int audit_set_class(au_evclass_map_t *evc_map, size_t sz) { if (sizeof(*evc_map) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETCLASS, evc_map, sz)); } int audit_get_event(au_evname_map_t *evn_map, size_t sz) { if (sizeof(*evn_map) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETEVENT, evn_map, sz)); } int audit_set_event(au_evname_map_t *evn_map, size_t sz) { if (sizeof(*evn_map) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETEVENT, evn_map, sz)); } int audit_get_kmask(au_mask_t *kmask, size_t sz) { if (sizeof(*kmask) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETKMASK, kmask, sz)); } int audit_set_kmask(au_mask_t *kmask, size_t sz) { if (sizeof(*kmask) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETKMASK, kmask, sz)); } int audit_get_fsize(au_fstat_t *fstat, size_t sz) { if (sizeof(*fstat) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETFSIZE, fstat, sz)); } int audit_set_fsize(au_fstat_t *fstat, size_t sz) { if (sizeof(*fstat) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETFSIZE, fstat, sz)); } int audit_set_pmask(auditpinfo_t *api, size_t sz) { if (sizeof(*api) != sz) { errno = EINVAL; return (-1); } return (auditon(A_SETPMASK, api, sz)); } int audit_get_pinfo(auditpinfo_t *api, size_t sz) { if (sizeof(*api) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETPINFO, api, sz)); } int audit_get_pinfo_addr(auditpinfo_addr_t *apia, size_t sz) { if (sizeof(*apia) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETPINFO_ADDR, apia, sz)); } int audit_get_sinfo_addr(auditinfo_addr_t *aia, size_t sz) { if (sizeof(*aia) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETSINFO_ADDR, aia, sz)); } int audit_get_stat(au_stat_t *stats, size_t sz) { if (sizeof(*stats) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETSTAT, stats, sz)); } int audit_set_stat(au_stat_t *stats, size_t sz) { if (sizeof(*stats) != sz) { errno = EINVAL; return (-1); } return (auditon(A_GETSTAT, stats, sz)); } int audit_get_cwd(char *path, size_t sz) { return (auditon(A_GETCWD, path, sz)); } int audit_get_car(char *path, size_t sz) { return (auditon(A_GETCAR, path, sz)); } Index: head/include/dirent.h =================================================================== --- head/include/dirent.h (revision 318735) +++ head/include/dirent.h (revision 318736) @@ -1,122 +1,141 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dirent.h 8.2 (Berkeley) 7/28/94 * $FreeBSD$ */ #ifndef _DIRENT_H_ #define _DIRENT_H_ /* * The kernel defines the format of directory entries returned by * the getdirentries(2) system call. */ #include #include #include +#if __BSD_VISIBLE + +#ifndef _SIZE_T_DECLARED +typedef __size_t size_t; +#define _SIZE_T_DECLARED +#endif + +#ifndef _SSIZE_T_DECLARED +typedef __ssize_t ssize_t; +#define _SSIZE_T_DECLARED +#endif + +#ifndef _OFF_T_DECLARED +typedef __off_t off_t; +#define _OFF_T_DECLARED +#endif + +#endif /* __BSD_VISIBLE */ + #if __XSI_VISIBLE #ifndef _INO_T_DECLARED typedef __ino_t ino_t; #define _INO_T_DECLARED #endif /* * XXX this is probably illegal in the __XSI_VISIBLE case, but brings us closer * to the specification. */ #define d_ino d_fileno /* backward and XSI compatibility */ #endif /* __XSI_VISIBLE */ #if __BSD_VISIBLE #include /* definitions for library routines operating on directories. */ #define DIRBLKSIZ 1024 struct _dirdesc; typedef struct _dirdesc DIR; /* flags for opendir2 */ #define DTF_HIDEW 0x0001 /* hide whiteout entries */ #define DTF_NODUP 0x0002 /* don't return duplicate names */ #define DTF_REWIND 0x0004 /* rewind after reading union stack */ #define __DTF_READALL 0x0008 /* everything has been read */ #define __DTF_SKIPREAD 0x0010 /* assume internal buffer is populated */ #else /* !__BSD_VISIBLE */ typedef void * DIR; #endif /* __BSD_VISIBLE */ #ifndef _KERNEL __BEGIN_DECLS #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700 int alphasort(const struct dirent **, const struct dirent **); int dirfd(DIR *); #endif #if __BSD_VISIBLE DIR *__opendir2(const char *, int); int fdclosedir(DIR *); -int getdents(int, char *, int); -int getdirentries(int, char *, int, long *); +ssize_t getdents(int, char *, size_t); +ssize_t getdirentries(int, char *, size_t, off_t *); #endif DIR *opendir(const char *); DIR *fdopendir(int); struct dirent * readdir(DIR *); #if __POSIX_VISIBLE >= 199506 || __XSI_VISIBLE >= 500 int readdir_r(DIR *, struct dirent *, struct dirent **); #endif void rewinddir(DIR *); #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE >= 700 int scandir(const char *, struct dirent ***, int (*)(const struct dirent *), int (*)(const struct dirent **, const struct dirent **)); #ifdef __BLOCKS__ int scandir_b(const char *, struct dirent ***, int (^)(const struct dirent *), int (^)(const struct dirent **, const struct dirent **)); #endif #endif #if __XSI_VISIBLE void seekdir(DIR *, long); long telldir(DIR *); #endif int closedir(DIR *); __END_DECLS #endif /* !_KERNEL */ #endif /* !_DIRENT_H_ */ Index: head/lib/libarchive/Makefile =================================================================== --- head/lib/libarchive/Makefile (revision 318735) +++ head/lib/libarchive/Makefile (revision 318736) @@ -1,420 +1,420 @@ # $FreeBSD$ .include PACKAGE=lib${LIB} _LIBARCHIVEDIR= ${SRCTOP}/contrib/libarchive LIB= archive LIBADD= z bz2 lzma bsdxml CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1 # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system. # It has no real relation to the libarchive version number. -SHLIB_MAJOR= 6 +SHLIB_MAJOR= 7 CFLAGS+= -DPLATFORM_CONFIG_H=\"${.CURDIR}/config_freebsd.h\" CFLAGS+= -I${.OBJDIR} .if ${MK_OPENSSL} != "no" CFLAGS+= -DWITH_OPENSSL LIBADD+= crypto .else LIBADD+= md .endif .if ${MK_ICONV} != "no" # TODO: This can be changed back to CFLAGS once iconv works correctly # with statically linked binaries. SHARED_CFLAGS+= -DHAVE_ICONV=1 -DHAVE_ICONV_H=1 -DICONV_CONST= .endif .if ${MACHINE_ARCH:Marm*} != "" || ${MACHINE_ARCH:Mmips*} != "" || \ ${MACHINE_ARCH:Msparc64*} != "" || ${MACHINE_ARCH:Mpowerpc*} != "" NO_WCAST_ALIGN= yes .if ${MACHINE_ARCH:M*64*} == "" CFLAGS+= -DPPMD_32BIT .endif .endif NO_WCAST_ALIGN.clang= .PATH: ${_LIBARCHIVEDIR}/libarchive # Headers to be installed in /usr/include INCS= archive.h archive_entry.h # Sources to be compiled. SRCS= archive_acl.c \ archive_check_magic.c \ archive_cmdline.c \ archive_cryptor.c \ archive_disk_acl_freebsd.c \ archive_digest.c \ archive_entry.c \ archive_entry_copy_stat.c \ archive_entry_link_resolver.c \ archive_entry_sparse.c \ archive_entry_stat.c \ archive_entry_strmode.c \ archive_entry_xattr.c \ archive_getdate.c \ archive_hmac.c \ archive_match.c \ archive_options.c \ archive_pack_dev.c \ archive_pathmatch.c \ archive_ppmd7.c \ archive_random.c \ archive_rb.c \ archive_read.c \ archive_read_add_passphrase.c \ archive_read_append_filter.c \ archive_read_data_into_fd.c \ archive_read_disk_entry_from_file.c \ archive_read_disk_posix.c \ archive_read_disk_set_standard_lookup.c \ archive_read_extract.c \ archive_read_extract2.c \ archive_read_open_fd.c \ archive_read_open_file.c \ archive_read_open_filename.c \ archive_read_open_memory.c \ archive_read_set_format.c \ archive_read_set_options.c \ archive_read_support_filter_all.c \ archive_read_support_filter_bzip2.c \ archive_read_support_filter_compress.c \ archive_read_support_filter_gzip.c \ archive_read_support_filter_grzip.c \ archive_read_support_filter_lrzip.c \ archive_read_support_filter_lz4.c \ archive_read_support_filter_lzop.c \ archive_read_support_filter_none.c \ archive_read_support_filter_program.c \ archive_read_support_filter_rpm.c \ archive_read_support_filter_uu.c \ archive_read_support_filter_xz.c \ archive_read_support_format_7zip.c \ archive_read_support_format_all.c \ archive_read_support_format_ar.c \ archive_read_support_format_by_code.c \ archive_read_support_format_cab.c \ archive_read_support_format_cpio.c \ archive_read_support_format_empty.c \ archive_read_support_format_iso9660.c \ archive_read_support_format_lha.c \ archive_read_support_format_mtree.c \ archive_read_support_format_rar.c \ archive_read_support_format_raw.c \ archive_read_support_format_tar.c \ archive_read_support_format_warc.c \ archive_read_support_format_xar.c \ archive_read_support_format_zip.c \ archive_string.c \ archive_string_sprintf.c \ archive_util.c \ archive_version_details.c \ archive_virtual.c \ archive_write.c \ archive_write_add_filter.c \ archive_write_disk_set_standard_lookup.c \ archive_write_disk_posix.c \ archive_write_open_fd.c \ archive_write_open_file.c \ archive_write_open_filename.c \ archive_write_open_memory.c \ archive_write_add_filter_b64encode.c \ archive_write_add_filter_by_name.c \ archive_write_add_filter_bzip2.c \ archive_write_add_filter_compress.c \ archive_write_add_filter_grzip.c \ archive_write_add_filter_gzip.c \ archive_write_add_filter_lrzip.c \ archive_write_add_filter_lz4.c \ archive_write_add_filter_lzop.c \ archive_write_add_filter_none.c \ archive_write_add_filter_program.c \ archive_write_add_filter_uuencode.c \ archive_write_add_filter_xz.c \ archive_write_set_format.c \ archive_write_set_format_7zip.c \ archive_write_set_format_ar.c \ archive_write_set_format_by_name.c \ archive_write_set_format_cpio.c \ archive_write_set_format_cpio_newc.c \ archive_write_set_format_filter_by_ext.c \ archive_write_set_format_gnutar.c \ archive_write_set_format_iso9660.c \ archive_write_set_format_mtree.c \ archive_write_set_format_pax.c \ archive_write_set_format_raw.c \ archive_write_set_format_shar.c \ archive_write_set_format_ustar.c \ archive_write_set_format_v7tar.c \ archive_write_set_format_warc.c \ archive_write_set_format_xar.c \ archive_write_set_format_zip.c \ archive_write_set_passphrase.c \ archive_write_set_options.c \ filter_fork_posix.c # Man pages to be installed. MAN= archive_entry.3 \ archive_entry_acl.3 \ archive_entry_linkify.3 \ archive_entry_paths.3 \ archive_entry_perms.3 \ archive_entry_stat.3 \ archive_entry_time.3 \ archive_read.3 \ archive_read_data.3 \ archive_read_disk.3 \ archive_read_extract.3 \ archive_read_filter.3 \ archive_read_format.3 \ archive_read_free.3 \ archive_read_header.3 \ archive_read_new.3 \ archive_read_open.3 \ archive_read_set_options.3 \ archive_util.3 \ archive_write.3 \ archive_write_blocksize.3 \ archive_write_data.3 \ archive_write_disk.3 \ archive_write_filter.3 \ archive_write_finish_entry.3 \ archive_write_format.3 \ archive_write_free.3 \ archive_write_header.3 \ archive_write_new.3 \ archive_write_open.3 \ archive_write_set_options.3 \ cpio.5 \ libarchive.3 \ libarchive_changes.3 \ libarchive_internals.3 \ libarchive-formats.5 \ tar.5 # Symlink the man pages under each function name. MLINKS+= archive_entry.3 archive_entry_clear.3 MLINKS+= archive_entry.3 archive_entry_clone.3 MLINKS+= archive_entry.3 archive_entry_free.3 MLINKS+= archive_entry.3 archive_entry_new.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_add_entry.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_add_entry_w.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_clear.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_count.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_next.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_next_w.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_reset.3 MLINKS+= archive_entry_acl.3 archive_entry_acl_text_w.3 MLINKS+= archive_entry_linkify.3 archive_entry_linkresolver.3 MLINKS+= archive_entry_linkify.3 archive_entry_linkresolver_new.3 MLINKS+= archive_entry_linkify.3 archive_entry_linkresolver_set_strategy.3 MLINKS+= archive_entry_linkify.3 archive_entry_linkresolver_free.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_hardlink.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_hardlink_w.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_link.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_link_w.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_pathname.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_pathname_w.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_sourcepath.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_symlink.3 MLINKS+= archive_entry_paths.3 archive_entry_copy_symlink_w.3 MLINKS+= archive_entry_paths.3 archive_entry_hardlink.3 MLINKS+= archive_entry_paths.3 archive_entry_hardlink_w.3 MLINKS+= archive_entry_paths.3 archive_entry_pathname.3 MLINKS+= archive_entry_paths.3 archive_entry_pathname_w.3 MLINKS+= archive_entry_paths.3 archive_entry_set_hardlink.3 MLINKS+= archive_entry_paths.3 archive_entry_set_link.3 MLINKS+= archive_entry_paths.3 archive_entry_set_pathname.3 MLINKS+= archive_entry_paths.3 archive_entry_set_symlink.3 MLINKS+= archive_entry_paths.3 archive_entry_symlink.3 MLINKS+= archive_entry_paths.3 archive_entry_symlink_w.3 MLINKS+= archive_entry_paths.3 archive_entry_update_symlink_utf8.3 MLINKS+= archive_entry_paths.3 archive_entry_update_hardlink_utf8.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_fflags_text.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_fflags_text_w.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_gname.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_gname_w.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_uname.3 MLINKS+= archive_entry_perms.3 archive_entry_copy_uname_w.3 MLINKS+= archive_entry_perms.3 archive_entry_fflags.3 MLINKS+= archive_entry_perms.3 archive_entry_fflags_text.3 MLINKS+= archive_entry_perms.3 archive_entry_gid.3 MLINKS+= archive_entry_perms.3 archive_entry_gname.3 MLINKS+= archive_entry_perms.3 archive_entry_gname_w.3 MLINKS+= archive_entry_perms.3 archive_entry_set_fflags.3 MLINKS+= archive_entry_perms.3 archive_entry_set_gid.3 MLINKS+= archive_entry_perms.3 archive_entry_set_gname.3 MLINKS+= archive_entry_perms.3 archive_entry_perm.3 MLINKS+= archive_entry_perms.3 archive_entry_set_perm.3 MLINKS+= archive_entry_perms.3 archive_entry_set_uid.3 MLINKS+= archive_entry_perms.3 archive_entry_set_uname.3 MLINKS+= archive_entry_perms.3 archive_entry_strmode.3 MLINKS+= archive_entry_perms.3 archive_entry_uid.3 MLINKS+= archive_entry_perms.3 archive_entry_uname.3 MLINKS+= archive_entry_perms.3 archive_entry_uname_w.3 MLINKS+= archive_entry_perms.3 archive_entry_update_gname_utf8.3 MLINKS+= archive_entry_perms.3 archive_entry_update_uname_utf8.3 MLINKS+= archive_entry_stat.3 archive_entry_copy_stat.3 MLINKS+= archive_entry_stat.3 archive_entry_dev.3 MLINKS+= archive_entry_stat.3 archive_entry_dev_is_set.3 MLINKS+= archive_entry_stat.3 archive_entry_devmajor.3 MLINKS+= archive_entry_stat.3 archive_entry_devminor.3 MLINKS+= archive_entry_stat.3 archive_entry_filetype.3 MLINKS+= archive_entry_stat.3 archive_entry_ino.3 MLINKS+= archive_entry_stat.3 archive_entry_ino64.3 MLINKS+= archive_entry_stat.3 archive_entry_ino_is_set.3 MLINKS+= archive_entry_stat.3 archive_entry_mode.3 MLINKS+= archive_entry_stat.3 archive_entry_nlink.3 MLINKS+= archive_entry_stat.3 archive_entry_rdev.3 MLINKS+= archive_entry_stat.3 archive_entry_rdevmajor.3 MLINKS+= archive_entry_stat.3 archive_entry_rdevminor.3 MLINKS+= archive_entry_stat.3 archive_entry_set_dev.3 MLINKS+= archive_entry_stat.3 archive_entry_set_devmajor.3 MLINKS+= archive_entry_stat.3 archive_entry_set_devminor.3 MLINKS+= archive_entry_stat.3 archive_entry_set_filetype.3 MLINKS+= archive_entry_stat.3 archive_entry_set_ino.3 MLINKS+= archive_entry_stat.3 archive_entry_set_ino64.3 MLINKS+= archive_entry_stat.3 archive_entry_set_mode.3 MLINKS+= archive_entry_stat.3 archive_entry_set_nlink.3 MLINKS+= archive_entry_stat.3 archive_entry_set_rdev.3 MLINKS+= archive_entry_stat.3 archive_entry_set_rdevmajor.3 MLINKS+= archive_entry_stat.3 archive_entry_set_rdevminor.3 MLINKS+= archive_entry_stat.3 archive_entry_set_size.3 MLINKS+= archive_entry_stat.3 archive_entry_size.3 MLINKS+= archive_entry_stat.3 archive_entry_size_is_set.3 MLINKS+= archive_entry_stat.3 archive_entry_unset_size.3 MLINKS+= archive_entry_time.3 archive_entry_atime.3 MLINKS+= archive_entry_time.3 archive_entry_atime_is_set.3 MLINKS+= archive_entry_time.3 archive_entry_atime_nsec.3 MLINKS+= archive_entry_time.3 archive_entry_birthtime.3 MLINKS+= archive_entry_time.3 archive_entry_birthtime_is_set.3 MLINKS+= archive_entry_time.3 archive_entry_birthtime_nsec.3 MLINKS+= archive_entry_time.3 archive_entry_ctime.3 MLINKS+= archive_entry_time.3 archive_entry_ctime_is_set.3 MLINKS+= archive_entry_time.3 archive_entry_ctime_nsec.3 MLINKS+= archive_entry_time.3 archive_entry_mtime.3 MLINKS+= archive_entry_time.3 archive_entry_mtime_is_set.3 MLINKS+= archive_entry_time.3 archive_entry_mtime_nsec.3 MLINKS+= archive_entry_time.3 archive_entry_set_atime.3 MLINKS+= archive_entry_time.3 archive_entry_set_birthtime.3 MLINKS+= archive_entry_time.3 archive_entry_set_ctime.3 MLINKS+= archive_entry_time.3 archive_entry_set_mtime.3 MLINKS+= archive_entry_time.3 archive_entry_unset_atime.3 MLINKS+= archive_entry_time.3 archive_entry_unset_birthtime.3 MLINKS+= archive_entry_time.3 archive_entry_unset_ctime.3 MLINKS+= archive_entry_time.3 archive_entry_unset_mtime.3 MLINKS+= archive_read_data.3 archive_read_data_block.3 MLINKS+= archive_read_data.3 archive_read_data_into_fd.3 MLINKS+= archive_read_data.3 archive_read_data_skip.3 MLINKS+= archive_read_header.3 archive_read_next_header.3 MLINKS+= archive_read_header.3 archive_read_next_header2.3 MLINKS+= archive_read_extract.3 archive_read_extract2.3 MLINKS+= archive_read_extract.3 archive_read_extract_set_progress_callback.3 MLINKS+= archive_read_extract.3 archive_read_extract_set_skip_file.3 MLINKS+= archive_read_open.3 archive_read_open2.3 MLINKS+= archive_read_open.3 archive_read_open_FILE.3 MLINKS+= archive_read_open.3 archive_read_open_fd.3 MLINKS+= archive_read_open.3 archive_read_open_file.3 MLINKS+= archive_read_open.3 archive_read_open_filename.3 MLINKS+= archive_read_open.3 archive_read_open_memory.3 MLINKS+= archive_read_free.3 archive_read_close.3 MLINKS+= archive_read_free.3 archive_read_finish.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_all.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_bzip2.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_compress.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_gzip.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_lzma.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_none.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_xz.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_program.3 MLINKS+= archive_read_filter.3 archive_read_support_filter_program_signature.3 MLINKS+= archive_read_format.3 archive_read_support_format_7zip.3 MLINKS+= archive_read_format.3 archive_read_support_format_all.3 MLINKS+= archive_read_format.3 archive_read_support_format_ar.3 MLINKS+= archive_read_format.3 archive_read_support_format_by_code.3 MLINKS+= archive_read_format.3 archive_read_support_format_cab.3 MLINKS+= archive_read_format.3 archive_read_support_format_cpio.3 MLINKS+= archive_read_format.3 archive_read_support_format_empty.3 MLINKS+= archive_read_format.3 archive_read_support_format_iso9660.3 MLINKS+= archive_read_format.3 archive_read_support_format_lha.3 MLINKS+= archive_read_format.3 archive_read_support_format_mtree.3 MLINKS+= archive_read_format.3 archive_read_support_format_rar.3 MLINKS+= archive_read_format.3 archive_read_support_format_raw.3 MLINKS+= archive_read_format.3 archive_read_support_format_tar.3 MLINKS+= archive_read_format.3 archive_read_support_format_xar.3 MLINKS+= archive_read_format.3 archive_read_support_format_zip.3 MLINKS+= archive_read_disk.3 archive_read_disk_entry_from_file.3 MLINKS+= archive_read_disk.3 archive_read_disk_gname.3 MLINKS+= archive_read_disk.3 archive_read_disk_new.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_gname_lookup.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_standard_lookup.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_symlink_hybrid.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_symlink_logical.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_symlink_physical.3 MLINKS+= archive_read_disk.3 archive_read_disk_set_uname_lookup.3 MLINKS+= archive_read_disk.3 archive_read_disk_uname.3 MLINKS+= archive_read_set_options.3 archive_read_set_filter_option.3 MLINKS+= archive_read_set_options.3 archive_read_set_format_option.3 MLINKS+= archive_read_set_options.3 archive_read_set_option.3 MLINKS+= archive_util.3 archive_clear_error.3 MLINKS+= archive_util.3 archive_compression.3 MLINKS+= archive_util.3 archive_compression_name.3 MLINKS+= archive_util.3 archive_copy_error.3 MLINKS+= archive_util.3 archive_errno.3 MLINKS+= archive_util.3 archive_error_string.3 MLINKS+= archive_util.3 archive_file_count.3 MLINKS+= archive_util.3 archive_filter_code.3 MLINKS+= archive_util.3 archive_filter_count.3 MLINKS+= archive_util.3 archive_filter_name.3 MLINKS+= archive_util.3 archive_format.3 MLINKS+= archive_util.3 archive_format_name.3 MLINKS+= archive_util.3 archive_position.3 MLINKS+= archive_util.3 archive_set_error.3 MLINKS+= archive_write_blocksize.3 archive_write_get_bytes_in_last_block.3 MLINKS+= archive_write_blocksize.3 archive_write_get_bytes_per_block.3 MLINKS+= archive_write_blocksize.3 archive_write_set_bytes_in_last_block.3 MLINKS+= archive_write_blocksize.3 archive_write_set_bytes_per_block.3 MLINKS+= archive_write_disk.3 archive_write_data_block.3 MLINKS+= archive_write_disk.3 archive_write_disk_new.3 MLINKS+= archive_write_disk.3 archive_write_disk_set_group_lookup.3 MLINKS+= archive_write_disk.3 archive_write_disk_set_options.3 MLINKS+= archive_write_disk.3 archive_write_disk_set_skip_file.3 MLINKS+= archive_write_disk.3 archive_write_disk_set_standard_lookup.3 MLINKS+= archive_write_disk.3 archive_write_disk_set_user_lookup.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_bzip2.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_compress.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_gzip.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_lzip.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_lzma.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_none.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_program.3 MLINKS+= archive_write_filter.3 archive_write_add_filter_xz.3 MLINKS+= archive_write_format.3 archive_write_set_format_cpio.3 MLINKS+= archive_write_format.3 archive_write_set_format_pax.3 MLINKS+= archive_write_format.3 archive_write_set_format_pax_restricted.3 MLINKS+= archive_write_format.3 archive_write_set_format_shar.3 MLINKS+= archive_write_format.3 archive_write_set_format_shar_dump.3 MLINKS+= archive_write_format.3 archive_write_set_format_ustar.3 MLINKS+= archive_write_free.3 archive_write_close.3 MLINKS+= archive_write_free.3 archive_write_fail.3 MLINKS+= archive_write_free.3 archive_write_finish.3 MLINKS+= archive_write_open.3 archive_write_open_FILE.3 MLINKS+= archive_write_open.3 archive_write_open_fd.3 MLINKS+= archive_write_open.3 archive_write_open_file.3 MLINKS+= archive_write_open.3 archive_write_open_filename.3 MLINKS+= archive_write_open.3 archive_write_open_memory.3 MLINKS+= archive_write_set_options.3 archive_write_set_filter_option.3 MLINKS+= archive_write_set_options.3 archive_write_set_format_option.3 MLINKS+= archive_write_set_options.3 archive_write_set_option.3 MLINKS+= libarchive.3 archive.3 .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .include Index: head/lib/libc/gen/Makefile.inc =================================================================== --- head/lib/libc/gen/Makefile.inc (revision 318735) +++ head/lib/libc/gen/Makefile.inc (revision 318736) @@ -1,525 +1,533 @@ # @(#)Makefile.inc 8.6 (Berkeley) 5/4/95 # $FreeBSD$ # machine-independent gen sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/gen ${LIBC_SRCTOP}/gen SRCS+= __getosreldate.c \ __pthread_mutex_init_calloc_cb_stub.c \ __xuname.c \ _once_stub.c \ _pthread_stubs.c \ _rand48.c \ _spinlock_stub.c \ _thread_init.c \ alarm.c \ arc4random.c \ assert.c \ auxv.c \ basename.c \ basename_compat.c \ cap_sandboxed.c \ check_utility_compat.c \ clock.c \ clock_getcpuclockid.c \ closedir.c \ confstr.c \ crypt.c \ ctermid.c \ daemon.c \ devname.c \ dirfd.c \ dirname.c \ dirname_compat.c \ disklabel.c \ dlfcn.c \ drand48.c \ dup3.c \ elf_utils.c \ erand48.c \ err.c \ errlst.c \ errno.c \ exec.c \ fdevname.c \ feature_present.c \ fmtcheck.c \ fmtmsg.c \ fnmatch.c \ fpclassify.c \ frexp.c \ fstab.c \ ftok.c \ fts.c \ ftw.c \ getbootfile.c \ getbsize.c \ getcap.c \ getcwd.c \ getdomainname.c \ getgrent.c \ getgrouplist.c \ gethostname.c \ getloadavg.c \ getlogin.c \ getmntinfo.c \ getnetgrent.c \ getosreldate.c \ getpagesize.c \ getpagesizes.c \ getpeereid.c \ getprogname.c \ getpwent.c \ getttyent.c \ getusershell.c \ getutxent.c \ getvfsbyname.c \ glob.c \ initgroups.c \ isatty.c \ isinf.c \ isnan.c \ jrand48.c \ lcong48.c \ libc_dlopen.c \ lockf.c \ lrand48.c \ mrand48.c \ nftw.c \ nice.c \ nlist.c \ nrand48.c \ opendir.c \ pause.c \ pmadvise.c \ popen.c \ posix_spawn.c \ psignal.c \ pututxline.c \ pw_scan.c \ raise.c \ readdir.c \ readpassphrase.c \ recvmmsg.c \ rewinddir.c \ scandir.c \ seed48.c \ seekdir.c \ semctl.c \ sendmmsg.c \ setdomainname.c \ sethostname.c \ setjmperr.c \ setmode.c \ setproctitle.c \ setprogname.c \ siginterrupt.c \ siglist.c \ signal.c \ sigsetops.c \ sleep.c \ srand48.c \ statvfs.c \ stringlist.c \ strtofflags.c \ sysconf.c \ sysctl.c \ sysctlbyname.c \ sysctlnametomib.c \ syslog.c \ telldir.c \ termios.c \ time.c \ times.c \ timezone.c \ tls.c \ ttyname.c \ ttyslot.c \ ualarm.c \ ulimit.c \ uname.c \ usleep.c \ utime.c \ utxdb.c \ valloc.c \ wait.c \ wait3.c \ waitpid.c \ waitid.c \ wordexp.c .if ${MK_SYMVER} == yes -SRCS+= fts-compat.c \ +SRCS+= devname-compat11.c \ + fts-compat.c \ + fts-compat11.c \ + ftw-compat11.c \ + getmntinfo-compat11.c \ + glob-compat11.c \ + nftw-compat11.c \ + readdir-compat11.c \ + scandir-compat11.c \ unvis-compat.c .endif .PATH: ${SRCTOP}/contrib/libc-pwcache SRCS+= pwcache.c pwcache.h .PATH: ${SRCTOP}/contrib/libc-vis CFLAGS+= -I${SRCTOP}/contrib/libc-vis SRCS+= unvis.c vis.c MISRCS+=modf.c CANCELPOINTS_SRCS=sem.c sem_new.c .for src in ${CANCELPOINTS_SRCS} SRCS+=cancelpoints_${src} CLEANFILES+=cancelpoints_${src} cancelpoints_${src}: ${LIBC_SRCTOP}/gen/${src} .NOMETA ln -sf ${.ALLSRC} ${.TARGET} .endfor SYM_MAPS+=${LIBC_SRCTOP}/gen/Symbol.map # machine-dependent gen sources .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/gen/Makefile.inc" MAN+= alarm.3 \ arc4random.3 \ basename.3 \ cap_rights_get.3 \ cap_sandboxed.3 \ check_utility_compat.3 \ clock.3 \ clock_getcpuclockid.3 \ confstr.3 \ ctermid.3 \ daemon.3 \ devname.3 \ directory.3 \ dirname.3 \ dl_iterate_phdr.3 \ dladdr.3 \ dlinfo.3 \ dllockinit.3 \ dlopen.3 \ dup3.3 \ err.3 \ exec.3 \ feature_present.3 \ fmtcheck.3 \ fmtmsg.3 \ fnmatch.3 \ fpclassify.3 \ frexp.3 \ ftok.3 \ fts.3 \ ftw.3 \ getbootfile.3 \ getbsize.3 \ getcap.3 \ getcontext.3 \ getcwd.3 \ getdiskbyname.3 \ getdomainname.3 \ getfsent.3 \ getgrent.3 \ getgrouplist.3 \ gethostname.3 \ getloadavg.3 \ getmntinfo.3 \ getnetgrent.3 \ getosreldate.3 \ getpagesize.3 \ getpagesizes.3 \ getpass.3 \ getpeereid.3 \ getprogname.3 \ getpwent.3 \ getttyent.3 \ getusershell.3 \ getutxent.3 \ getvfsbyname.3 \ glob.3 \ initgroups.3 \ isgreater.3 \ ldexp.3 \ lockf.3 \ makecontext.3 \ modf.3 \ nice.3 \ nlist.3 \ pause.3 \ popen.3 \ posix_spawn.3 \ posix_spawn_file_actions_addopen.3 \ posix_spawn_file_actions_init.3 \ posix_spawnattr_getflags.3 \ posix_spawnattr_getpgroup.3 \ posix_spawnattr_getschedparam.3 \ posix_spawnattr_getschedpolicy.3 \ posix_spawnattr_init.3 \ posix_spawnattr_getsigdefault.3 \ posix_spawnattr_getsigmask.3 \ psignal.3 \ pwcache.3 \ raise.3 \ rand48.3 \ readpassphrase.3 \ rfork_thread.3 \ scandir.3 \ sem_destroy.3 \ sem_getvalue.3 \ sem_init.3 \ sem_open.3 \ sem_post.3 \ sem_timedwait.3 \ sem_wait.3 \ setjmp.3 \ setmode.3 \ setproctitle.3 \ siginterrupt.3 \ signal.3 \ sigsetops.3 \ sleep.3 \ statvfs.3 \ stringlist.3 \ strtofflags.3 \ sysconf.3 \ sysctl.3 \ syslog.3 \ tcgetpgrp.3 \ tcgetsid.3 \ tcsendbreak.3 \ tcsetattr.3 \ tcsetpgrp.3 \ tcsetsid.3 \ time.3 \ times.3 \ timezone.3 \ ttyname.3 \ tzset.3 \ ualarm.3 \ ucontext.3 \ ulimit.3 \ uname.3 \ unvis.3 \ usleep.3 \ utime.3 \ valloc.3 \ vis.3 \ wordexp.3 MLINKS+=arc4random.3 arc4random_addrandom.3 \ arc4random.3 arc4random_stir.3 \ arc4random.3 arc4random_buf.3 \ arc4random.3 arc4random_uniform.3 MLINKS+=basename.3 basename_r.3 MLINKS+=ctermid.3 ctermid_r.3 MLINKS+=devname.3 devname_r.3 MLINKS+=devname.3 fdevname.3 MLINKS+=devname.3 fdevname_r.3 MLINKS+=directory.3 closedir.3 \ directory.3 dirfd.3 \ directory.3 fdclosedir.3 \ directory.3 fdopendir.3 \ directory.3 opendir.3 \ directory.3 readdir.3 \ directory.3 readdir_r.3 \ directory.3 rewinddir.3 \ directory.3 seekdir.3 \ directory.3 telldir.3 MLINKS+=dlopen.3 fdlopen.3 \ dlopen.3 dlclose.3 \ dlopen.3 dlerror.3 \ dlopen.3 dlfunc.3 \ dlopen.3 dlsym.3 MLINKS+=err.3 err_set_exit.3 \ err.3 err_set_file.3 \ err.3 errc.3 \ err.3 errx.3 \ err.3 verr.3 \ err.3 verrc.3 \ err.3 verrx.3 \ err.3 vwarn.3 \ err.3 vwarnc.3 \ err.3 vwarnx.3 \ err.3 warnc.3 \ err.3 warn.3 \ err.3 warnx.3 MLINKS+=exec.3 execl.3 \ exec.3 execle.3 \ exec.3 execlp.3 \ exec.3 exect.3 \ exec.3 execv.3 \ exec.3 execvP.3 \ exec.3 execvp.3 MLINKS+=fpclassify.3 finite.3 \ fpclassify.3 finitef.3 \ fpclassify.3 isfinite.3 \ fpclassify.3 isinf.3 \ fpclassify.3 isnan.3 \ fpclassify.3 isnormal.3 MLINKS+=frexp.3 frexpf.3 \ frexp.3 frexpl.3 MLINKS+=fts.3 fts_children.3 \ fts.3 fts_close.3 \ fts.3 fts_open.3 \ fts.3 fts_read.3 \ fts.3 fts_set.3 \ fts.3 fts_set_clientptr.3 \ fts.3 fts_get_clientptr.3 \ fts.3 fts_get_stream.3 MLINKS+=ftw.3 nftw.3 MLINKS+=getcap.3 cgetcap.3 \ getcap.3 cgetclose.3 \ getcap.3 cgetent.3 \ getcap.3 cgetfirst.3 \ getcap.3 cgetmatch.3 \ getcap.3 cgetnext.3 \ getcap.3 cgetnum.3 \ getcap.3 cgetset.3 \ getcap.3 cgetstr.3 \ getcap.3 cgetustr.3 MLINKS+=getcwd.3 getwd.3 MLINKS+=getcontext.3 getcontextx.3 MLINKS+=getcontext.3 setcontext.3 MLINKS+=getdomainname.3 setdomainname.3 MLINKS+=getfsent.3 endfsent.3 \ getfsent.3 getfsfile.3 \ getfsent.3 getfsspec.3 \ getfsent.3 getfstype.3 \ getfsent.3 setfsent.3 \ getfsent.3 setfstab.3 \ getfsent.3 getfstab.3 MLINKS+=getgrent.3 endgrent.3 \ getgrent.3 getgrgid.3 \ getgrent.3 getgrnam.3 \ getgrent.3 setgrent.3 \ getgrent.3 setgroupent.3 \ getgrent.3 getgrent_r.3 \ getgrent.3 getgrnam_r.3 \ getgrent.3 getgrgid_r.3 MLINKS+=gethostname.3 sethostname.3 MLINKS+=getnetgrent.3 endnetgrent.3 \ getnetgrent.3 getnetgrent_r.3 \ getnetgrent.3 innetgr.3 \ getnetgrent.3 setnetgrent.3 MLINKS+=getprogname.3 setprogname.3 MLINKS+=getpwent.3 endpwent.3 \ getpwent.3 getpwnam.3 \ getpwent.3 getpwuid.3 \ getpwent.3 setpassent.3 \ getpwent.3 setpwent.3 \ getpwent.3 setpwfile.3 \ getpwent.3 getpwent_r.3 \ getpwent.3 getpwnam_r.3 \ getpwent.3 getpwuid_r.3 MLINKS+=getttyent.3 endttyent.3 \ getttyent.3 getttynam.3 \ getttyent.3 isdialuptty.3 \ getttyent.3 isnettty.3 \ getttyent.3 setttyent.3 MLINKS+=getusershell.3 endusershell.3 \ getusershell.3 setusershell.3 MLINKS+=getutxent.3 endutxent.3 \ getutxent.3 getutxid.3 \ getutxent.3 getutxline.3 \ getutxent.3 getutxuser.3 \ getutxent.3 pututxline.3 \ getutxent.3 setutxdb.3 \ getutxent.3 setutxent.3 \ getutxent.3 utmpx.3 MLINKS+=glob.3 globfree.3 MLINKS+=isgreater.3 isgreaterequal.3 \ isgreater.3 isless.3 \ isgreater.3 islessequal.3 \ isgreater.3 islessgreater.3 \ isgreater.3 isunordered.3 MLINKS+=ldexp.3 ldexpf.3 \ ldexp.3 ldexpl.3 MLINKS+=makecontext.3 swapcontext.3 MLINKS+=modf.3 modff.3 \ modf.3 modfl.3 MLINKS+=popen.3 pclose.3 MLINKS+=posix_spawn.3 posix_spawnp.3 \ posix_spawn_file_actions_addopen.3 posix_spawn_file_actions_addclose.3 \ posix_spawn_file_actions_addopen.3 posix_spawn_file_actions_adddup2.3 \ posix_spawn_file_actions_init.3 posix_spawn_file_actions_destroy.3 \ posix_spawnattr_getflags.3 posix_spawnattr_setflags.3 \ posix_spawnattr_getpgroup.3 posix_spawnattr_setpgroup.3 \ posix_spawnattr_getschedparam.3 posix_spawnattr_setschedparam.3 \ posix_spawnattr_getschedpolicy.3 posix_spawnattr_setschedpolicy.3 \ posix_spawnattr_getsigdefault.3 posix_spawnattr_setsigdefault.3 \ posix_spawnattr_getsigmask.3 posix_spawnattr_setsigmask.3 \ posix_spawnattr_init.3 posix_spawnattr_destroy.3 MLINKS+=psignal.3 strsignal.3 \ psignal.3 sys_siglist.3 \ psignal.3 sys_signame.3 MLINKS+=pwcache.3 group_from_gid.3 \ pwcache.3 user_from_uid.3 MLINKS+=rand48.3 _rand48.3 \ rand48.3 drand48.3 \ rand48.3 erand48.3 \ rand48.3 jrand48.3 \ rand48.3 lcong48.3 \ rand48.3 lrand48.3 \ rand48.3 mrand48.3 \ rand48.3 nrand48.3 \ rand48.3 seed48.3 \ rand48.3 srand48.3 MLINKS+=recv.2 recvmmsg.2 MLINKS+=scandir.3 alphasort.3 MLINKS+=sem_open.3 sem_close.3 \ sem_open.3 sem_unlink.3 MLINKS+=sem_wait.3 sem_trywait.3 MLINKS+=sem_timedwait.3 sem_clockwait_np.3 MLINKS+=send.2 sendmmsg.2 MLINKS+=setjmp.3 _longjmp.3 \ setjmp.3 _setjmp.3 \ setjmp.3 longjmp.3 \ setjmp.3 longjmperr.3 \ setjmp.3 longjmperror.3 \ setjmp.3 siglongjmp.3 \ setjmp.3 sigsetjmp.3 MLINKS+=setmode.3 getmode.3 MLINKS+=sigsetops.3 sigaddset.3 \ sigsetops.3 sigdelset.3 \ sigsetops.3 sigemptyset.3 \ sigsetops.3 sigfillset.3 \ sigsetops.3 sigismember.3 MLINKS+=statvfs.3 fstatvfs.3 MLINKS+=stringlist.3 sl_add.3 \ stringlist.3 sl_find.3 \ stringlist.3 sl_free.3 \ stringlist.3 sl_init.3 MLINKS+=strtofflags.3 fflagstostr.3 MLINKS+=sysctl.3 sysctlbyname.3 \ sysctl.3 sysctlnametomib.3 MLINKS+=syslog.3 closelog.3 \ syslog.3 openlog.3 \ syslog.3 setlogmask.3 \ syslog.3 vsyslog.3 MLINKS+=tcsendbreak.3 tcdrain.3 \ tcsendbreak.3 tcflow.3 \ tcsendbreak.3 tcflush.3 MLINKS+=tcsetattr.3 cfgetispeed.3 \ tcsetattr.3 cfgetospeed.3 \ tcsetattr.3 cfmakeraw.3 \ tcsetattr.3 cfmakesane.3 \ tcsetattr.3 cfsetispeed.3 \ tcsetattr.3 cfsetospeed.3 \ tcsetattr.3 cfsetspeed.3 \ tcsetattr.3 tcgetattr.3 MLINKS+=ttyname.3 isatty.3 \ ttyname.3 ttyname_r.3 MLINKS+=tzset.3 tzsetwall.3 MLINKS+=unvis.3 strunvis.3 \ unvis.3 strunvisx.3 MLINKS+=vis.3 nvis.3 \ vis.3 snvis.3 \ vis.3 strenvisx.3 \ vis.3 strnunvis.3 \ vis.3 strnunvisx.3 \ vis.3 strnvis.3 \ vis.3 strnvisx.3 \ vis.3 strsenvisx.3 \ vis.3 strsnvis.3 \ vis.3 strsnvisx.3 \ vis.3 strsvis.3 \ vis.3 strsvisx.3 \ vis.3 strvis.3 \ vis.3 strvisx.3 \ vis.3 svis.3 MLINKS+=wordexp.3 wordfree.3 Index: head/lib/libc/gen/Symbol.map =================================================================== --- head/lib/libc/gen/Symbol.map (revision 318735) +++ head/lib/libc/gen/Symbol.map (revision 318736) @@ -1,547 +1,547 @@ /* * $FreeBSD$ */ FBSD_1.0 { __xuname; pthread_atfork; pthread_attr_destroy; pthread_attr_getdetachstate; pthread_attr_getguardsize; pthread_attr_getinheritsched; pthread_attr_getschedparam; pthread_attr_getschedpolicy; pthread_attr_getscope; pthread_attr_getstackaddr; pthread_attr_getstacksize; pthread_attr_init; pthread_attr_setdetachstate; pthread_attr_setguardsize; pthread_attr_setinheritsched; pthread_attr_setschedparam; pthread_attr_setschedpolicy; pthread_attr_setscope; pthread_attr_setstackaddr; pthread_attr_setstacksize; pthread_cancel; pthread_cleanup_pop; pthread_cleanup_push; pthread_cond_broadcast; pthread_cond_destroy; pthread_cond_init; pthread_cond_signal; pthread_cond_timedwait; pthread_cond_wait; pthread_detach; pthread_equal; pthread_exit; pthread_getspecific; pthread_join; pthread_key_create; pthread_key_delete; pthread_kill; pthread_main_np; pthread_mutex_destroy; pthread_mutex_init; pthread_mutex_lock; pthread_mutex_trylock; pthread_mutex_unlock; pthread_mutexattr_destroy; pthread_mutexattr_init; pthread_mutexattr_settype; pthread_once; pthread_rwlock_destroy; pthread_rwlock_init; pthread_rwlock_rdlock; pthread_rwlock_tryrdlock; pthread_rwlock_trywrlock; pthread_rwlock_unlock; pthread_rwlock_wrlock; pthread_self; pthread_setcancelstate; pthread_setcanceltype; pthread_setspecific; pthread_sigmask; pthread_testcancel; alarm; arc4random; arc4random_addrandom; arc4random_stir; __assert; check_utility_compat; clock; closedir; confstr; ctermid; ctermid_r; daemon; - devname; - devname_r; getdiskbyname; dladdr; dlclose; dlerror; dlfunc; dllockinit; dlopen; dlsym; dlvsym; dlinfo; dl_iterate_phdr; drand48; erand48; err_set_file; err_set_exit; err; verr; errc; verrc; errx; verrx; warn; vwarn; warnc; vwarnc; warnx; vwarnx; sys_errlist; sys_nerr; errno; execl; execle; execlp; execv; execvp; execvP; fmtcheck; fmtmsg; fnmatch; __fpclassifyf; __fpclassifyd; __fpclassifyl; frexp; setfstab; getfstab; getfsent; getfsspec; getfsfile; setfsent; endfsent; ftok; - ftw; - glob; - globfree; getbootfile; getbsize; cgetset; cgetcap; cgetent; cgetmatch; cgetfirst; cgetclose; cgetnext; cgetstr; cgetustr; cgetnum; getcwd; getdomainname; setgrent; setgroupent; endgrent; getgrent_r; getgrnam_r; getgrgid_r; getgrnam; getgrgid; getgrent; /* * Why are __gr_parse_entry() and __gr_match_entry() not static in * gen/getgrent.c? */ getgrouplist; gethostname; getloadavg; getlogin; getlogin_r; - getmntinfo; setnetgrent; getnetgrent; endnetgrent; innetgr; getosreldate; getpagesize; getpeereid; _getprogname; getprogname; setpwent; setpassent; endpwent; getpwent_r; getpwnam_r; getpwuid_r; getpwnam; getpwuid; getpwent; getttynam; getttyent; setttyent; endttyent; isdialuptty; isnettty; getusershell; endusershell; setusershell; getvfsbyname; __isnan; isnan; __isnanf; isnanf; __isinf; isinf; __isinff; __isinfl; isatty; initgroups; jrand48; lcong48; ldexp; lockf; lrand48; modf; mrand48; - nftw; nice; nlist; nrand48; opendir; pause; posix_madvise; popen; pclose; psignal; raise; - readdir; - readdir_r; readpassphrase; getpass; rewinddir; - scandir; - alphasort; seed48; seekdir; user_from_uid; group_from_gid; setdomainname; sethostname; longjmperror; getmode; setmode; setproctitle; setprogname; siginterrupt; sys_signame; sys_siglist; sys_nsig; signal; sigaddset; sigdelset; sigemptyset; sigfillset; sigismember; sleep; srand48; fstatvfs; statvfs; sl_init; sl_add; sl_free; sl_find; fflagstostr; strtofflags; sysconf; sysctl; sysctlbyname; sysctlnametomib; syslog; vsyslog; openlog; closelog; setlogmask; ttyname_r; ttyname; timezone; times; time; telldir; tcgetattr; tcsetattr; tcsetpgrp; tcgetpgrp; cfgetospeed; cfgetispeed; cfsetospeed; cfsetispeed; cfsetspeed; cfmakeraw; tcsendbreak; _init_tls; __tls_get_addr; tcdrain; tcflush; tcflow; ualarm; ulimit; uname; strunvis; strunvisx; usleep; utime; valloc; vis; strvis; strvisx; wait; wait3; waitpid; wordexp; wordfree; }; FBSD_1.1 { arc4random_buf; arc4random_uniform; fdevname; fdevname_r; fdopendir; feature_present; - fts_children; - fts_close; - fts_get_clientptr; - fts_get_stream; - fts_open; - fts_read; - fts_set; - fts_set_clientptr; posix_spawn; posix_spawn_file_actions_addclose; posix_spawn_file_actions_adddup2; posix_spawn_file_actions_addopen; posix_spawn_file_actions_destroy; posix_spawn_file_actions_init; posix_spawnattr_destroy; posix_spawnattr_getflags; posix_spawnattr_getpgroup; posix_spawnattr_getschedparam; posix_spawnattr_getschedpolicy; posix_spawnattr_getsigdefault; posix_spawnattr_getsigmask; posix_spawnattr_init; posix_spawnattr_setflags; posix_spawnattr_setpgroup; posix_spawnattr_setschedparam; posix_spawnattr_setschedpolicy; posix_spawnattr_setsigdefault; posix_spawnattr_setsigmask; posix_spawnp; semctl; tcgetsid; tcsetsid; __pthread_cleanup_pop_imp; __pthread_cleanup_push_imp; }; FBSD_1.2 { basename_r; cfmakesane; endutxent; getpagesizes; getutxent; getutxid; getutxline; getutxuser; pututxline; sem_close; sem_destroy; sem_getvalue; sem_init; sem_open; sem_post; sem_timedwait; sem_trywait; sem_unlink; sem_wait; setutxdb; setutxent; }; FBSD_1.3 { clock_getcpuclockid; dirfd; dup3; fdclosedir; fdlopen; __FreeBSD_libc_enter_restricted_mode; getcontextx; gid_from_group; nvis; pwcache_userdb; pwcache_groupdb; snvis; strenvisx; strnunvis; strnunvisx; strnvis; strnvisx; strsenvisx; strsnvis; strsnvisx; strsvis; strsvisx; svis; uid_from_user; unvis; waitid; }; FBSD_1.4 { getnetgrent_r; pthread_mutex_consistent; pthread_mutexattr_getrobust; pthread_mutexattr_setrobust; - scandir_b; stravis; }; FBSD_1.5 { + alphasort; basename; + devname; + devname_r; dirname; + fts_children; + fts_close; + fts_get_clientptr; + fts_get_stream; + fts_open; + fts_read; + fts_set; + fts_set_clientptr; + ftw; + getmntinfo; + glob; + globfree; + nftw; + readdir; + readdir_r; + scandir; + scandir_b; sem_clockwait_np; }; FBSDprivate_1.0 { /* needed by thread libraries */ __thr_jtable; _pthread_atfork; _pthread_attr_destroy; _pthread_attr_getdetachstate; _pthread_attr_getguardsize; _pthread_attr_getinheritsched; _pthread_attr_getschedparam; _pthread_attr_getschedpolicy; _pthread_attr_getscope; _pthread_attr_getstackaddr; _pthread_attr_getstacksize; _pthread_attr_init; _pthread_attr_setdetachstate; _pthread_attr_setguardsize; _pthread_attr_setinheritsched; _pthread_attr_setschedparam; _pthread_attr_setschedpolicy; _pthread_attr_setscope; _pthread_attr_setstackaddr; _pthread_attr_setstacksize; _pthread_cancel; _pthread_cancel_enter; _pthread_cancel_leave; _pthread_cleanup_pop; _pthread_cleanup_push; _pthread_cond_broadcast; _pthread_cond_destroy; _pthread_cond_init; _pthread_cond_signal; _pthread_cond_timedwait; _pthread_cond_wait; _pthread_detach; _pthread_equal; _pthread_exit; _pthread_getspecific; _pthread_join; _pthread_key_create; _pthread_key_delete; _pthread_kill; _pthread_main_np; _pthread_mutex_destroy; _pthread_mutex_init_calloc_cb; _pthread_mutex_init; _pthread_mutex_lock; _pthread_mutex_trylock; _pthread_mutex_unlock; _pthread_mutexattr_destroy; _pthread_mutexattr_init; _pthread_mutexattr_settype; _pthread_once; _pthread_rwlock_destroy; _pthread_rwlock_init; _pthread_rwlock_rdlock; _pthread_rwlock_tryrdlock; _pthread_rwlock_trywrlock; _pthread_rwlock_unlock; _pthread_rwlock_wrlock; _pthread_self; _pthread_setcancelstate; _pthread_setcanceltype; _pthread_setspecific; _pthread_sigmask; _pthread_testcancel; _spinlock; _spinunlock; _rtld_addr_phdr; _rtld_atfork_pre; _rtld_atfork_post; _rtld_error; /* for private use */ _rtld_get_stack_prot; _rtld_is_dlopened; _rtld_thread_init; /* for private use */ __elf_phdr_match_addr; _err; _warn; __fmtcheck; /* __pw_match_entry; */ /* __pw_parse_entry; */ __fdnlist; /* used by libkvm */ /* __aout_fdnlist; */ /* __elf_is_okay__; */ /* __elf_fdnlist; */ __opendir2; __pause; _pause; __pw_scan; /* Used by (at least) libutil */ __raise; _raise; __sleep; _sleep; _rtld_allocate_tls; _rtld_free_tls; #if defined(i386) ___libc_tls_get_addr; /* x86 only */ #endif __libc_tls_get_addr; __tcdrain; _tcdrain; __usleep; _usleep; __wait; _wait; __waitpid; _waitpid; _libc_sem_init_compat; _libc_sem_destroy_compat; _libc_sem_open_compat; _libc_sem_close_compat; _libc_sem_unlink_compat; _libc_sem_wait_compat; _libc_sem_trywait_compat; _libc_sem_timedwait_compat; _libc_sem_post_compat; _libc_sem_getvalue_compat; __libc_tcdrain; __elf_aux_vector; __pthread_map_stacks_exec; __fillcontextx; __fillcontextx2; __getcontextx_size; }; Index: head/lib/libc/gen/closedir.c =================================================================== --- head/lib/libc/gen/closedir.c (revision 318735) +++ head/lib/libc/gen/closedir.c (revision 318736) @@ -1,76 +1,77 @@ /* * Copyright (c) 1983, 1993 * Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)closedir.c 8.1 (Berkeley) 6/10/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "gen-private.h" #include "telldir.h" /* * close a directory. */ int fdclosedir(DIR *dirp) { int fd; if (__isthreaded) _pthread_mutex_lock(&dirp->dd_lock); fd = dirp->dd_fd; dirp->dd_fd = -1; dirp->dd_loc = 0; free((void *)dirp->dd_buf); + free(dirp->dd_compat_de); _reclaim_telldir(dirp); if (__isthreaded) { _pthread_mutex_unlock(&dirp->dd_lock); _pthread_mutex_destroy(&dirp->dd_lock); } free((void *)dirp); return (fd); } int closedir(DIR *dirp) { return (_close(fdclosedir(dirp))); } Index: head/lib/libc/gen/devname-compat11.c =================================================================== --- head/lib/libc/gen/devname-compat11.c (nonexistent) +++ head/lib/libc/gen/devname-compat11.c (revision 318736) @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2011 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "gen-compat.h" + +char * +freebsd11_devname(uint32_t dev, mode_t type) +{ + + return (devname(dev, type)); +} + +char * +freebsd11_devname_r(uint32_t dev, mode_t type, char *buf, int len) +{ + + return (devname_r(dev, type, buf, len)); +} + +__sym_compat(devname, freebsd11_devname, FBSD_1.0); +__sym_compat(devname_r, freebsd11_devname_r, FBSD_1.0); Property changes on: head/lib/libc/gen/devname-compat11.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/gen/fts-compat.c =================================================================== --- head/lib/libc/gen/fts-compat.c (revision 318735) +++ head/lib/libc/gen/fts-compat.c (revision 318736) @@ -1,1214 +1,1219 @@ /*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $OpenBSD: fts.c,v 1.22 1999/10/03 19:22:22 millert Exp $ */ #if 0 #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; #endif /* LIBC_SCCS and not lint */ #endif #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include +#define _WANT_FREEBSD11_STATFS #include +#define _WANT_FREEBSD11_STAT #include +#define _WANT_FREEBSD11_DIRENT #include #include #include #include #include #include +#include "gen-compat.h" #include "fts-compat.h" #include "un-namespace.h" #include "gen-private.h" FTSENT *__fts_children_44bsd(FTS *, int); int __fts_close_44bsd(FTS *); void *__fts_get_clientptr_44bsd(FTS *); FTS *__fts_get_stream_44bsd(FTSENT *); FTS *__fts_open_44bsd(char * const *, int, int (*)(const FTSENT * const *, const FTSENT * const *)); FTSENT *__fts_read_44bsd(FTS *); int __fts_set_44bsd(FTS *, FTSENT *, int); void __fts_set_clientptr_44bsd(FTS *, void *); static FTSENT *fts_alloc(FTS *, char *, int); static FTSENT *fts_build(FTS *, int); static void fts_lfree(FTSENT *); static void fts_load(FTS *, FTSENT *); static size_t fts_maxarglen(char * const *); static void fts_padjust(FTS *, FTSENT *); static int fts_palloc(FTS *, size_t); static FTSENT *fts_sort(FTS *, FTSENT *, int); static u_short fts_stat(FTS *, FTSENT *, int); static int fts_safe_changedir(FTS *, FTSENT *, int, char *); static int fts_ufslinks(FTS *, const FTSENT *); #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) #define CLR(opt) (sp->fts_options &= ~(opt)) #define ISSET(opt) (sp->fts_options & (opt)) #define SET(opt) (sp->fts_options |= (opt)) #define FCHDIR(sp, fd) (!ISSET(FTS_NOCHDIR) && fchdir(fd)) /* fts_build flags */ #define BCHILD 1 /* fts_children */ #define BNAMES 2 /* fts_children, names only */ #define BREAD 3 /* fts_read */ /* * Internal representation of an FTS, including extra implementation * details. The FTS returned from fts_open points to this structure's * ftsp_fts member (and can be cast to an _fts_private as required) */ struct _fts_private { FTS ftsp_fts; - struct statfs ftsp_statfs; - dev_t ftsp_dev; + struct freebsd11_statfs ftsp_statfs; + uint32_t ftsp_dev; int ftsp_linksreliable; }; /* * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it * knows that a directory could not possibly have subdirectories. This * is decided by looking at the link count: a subdirectory would * increment its parent's link count by virtue of its own ".." entry. * This assumption only holds for UFS-like filesystems that implement * links and directories this way, so we must punt for others. */ static const char *ufslike_filesystems[] = { "ufs", "zfs", "nfs", "ext2fs", 0 }; FTS * __fts_open_44bsd(char * const *argv, int options, int (*compar)(const FTSENT * const *, const FTSENT * const *)) { struct _fts_private *priv; FTS *sp; FTSENT *p, *root; int nitems; FTSENT *parent, *tmp; int len; /* Options check. */ if (options & ~FTS_OPTIONMASK) { errno = EINVAL; return (NULL); } /* Allocate/initialize the stream. */ if ((priv = calloc(1, sizeof(*priv))) == NULL) return (NULL); sp = &priv->ftsp_fts; sp->fts_compar = compar; sp->fts_options = options; /* Shush, GCC. */ tmp = NULL; /* Logical walks turn on NOCHDIR; symbolic links are too hard. */ if (ISSET(FTS_LOGICAL)) SET(FTS_NOCHDIR); /* * Start out with 1K of path space, and enough, in any case, * to hold the user's paths. */ if (fts_palloc(sp, MAX(fts_maxarglen(argv), MAXPATHLEN))) goto mem1; /* Allocate/initialize root's parent. */ if ((parent = fts_alloc(sp, "", 0)) == NULL) goto mem2; parent->fts_level = FTS_ROOTPARENTLEVEL; /* Allocate/initialize root(s). */ for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) { /* Don't allow zero-length paths. */ if ((len = strlen(*argv)) == 0) { errno = ENOENT; goto mem3; } p = fts_alloc(sp, *argv, len); p->fts_level = FTS_ROOTLEVEL; p->fts_parent = parent; p->fts_accpath = p->fts_name; p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW)); /* Command-line "." and ".." are real directories. */ if (p->fts_info == FTS_DOT) p->fts_info = FTS_D; /* * If comparison routine supplied, traverse in sorted * order; otherwise traverse in the order specified. */ if (compar) { p->fts_link = root; root = p; } else { p->fts_link = NULL; if (root == NULL) tmp = root = p; else { tmp->fts_link = p; tmp = p; } } } if (compar && nitems > 1) root = fts_sort(sp, root, nitems); /* * Allocate a dummy pointer and make fts_read think that we've just * finished the node before the root(s); set p->fts_info to FTS_INIT * so that everything about the "current" node is ignored. */ if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL) goto mem3; sp->fts_cur->fts_link = root; sp->fts_cur->fts_info = FTS_INIT; /* * If using chdir(2), grab a file descriptor pointing to dot to ensure * that we can get back here; this could be avoided for some paths, * but almost certainly not worth the effort. Slashes, symbolic links, * and ".." are all fairly nasty problems. Note, if we can't get the * descriptor we run anyway, just more slowly. */ if (!ISSET(FTS_NOCHDIR) && (sp->fts_rfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) SET(FTS_NOCHDIR); return (sp); mem3: fts_lfree(root); free(parent); mem2: free(sp->fts_path); mem1: free(sp); return (NULL); } static void fts_load(FTS *sp, FTSENT *p) { int len; char *cp; /* * Load the stream structure for the next traversal. Since we don't * actually enter the directory until after the preorder visit, set * the fts_accpath field specially so the chdir gets done to the right * place and the user can access the first node. From fts_open it's * known that the path will fit. */ len = p->fts_pathlen = p->fts_namelen; memmove(sp->fts_path, p->fts_name, len + 1); if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) { len = strlen(++cp); memmove(p->fts_name, cp, len + 1); p->fts_namelen = len; } p->fts_accpath = p->fts_path = sp->fts_path; sp->fts_dev = p->fts_dev; } int __fts_close_44bsd(FTS *sp) { FTSENT *freep, *p; int saved_errno; /* * This still works if we haven't read anything -- the dummy structure * points to the root list, so we step through to the end of the root * list which has a valid parent pointer. */ if (sp->fts_cur) { for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) { freep = p; p = p->fts_link != NULL ? p->fts_link : p->fts_parent; free(freep); } free(p); } /* Free up child linked list, sort array, path buffer. */ if (sp->fts_child) fts_lfree(sp->fts_child); if (sp->fts_array) free(sp->fts_array); free(sp->fts_path); /* Return to original directory, save errno if necessary. */ if (!ISSET(FTS_NOCHDIR)) { saved_errno = fchdir(sp->fts_rfd) ? errno : 0; (void)_close(sp->fts_rfd); /* Set errno and return. */ if (saved_errno != 0) { /* Free up the stream pointer. */ free(sp); errno = saved_errno; return (-1); } } /* Free up the stream pointer. */ free(sp); return (0); } /* * Special case of "/" at the end of the path so that slashes aren't * appended which would cause paths to be written as "....//foo". */ #define NAPPEND(p) \ (p->fts_path[p->fts_pathlen - 1] == '/' \ ? p->fts_pathlen - 1 : p->fts_pathlen) FTSENT * __fts_read_44bsd(FTS *sp) { FTSENT *p, *tmp; int instr; char *t; int saved_errno; /* If finished or unrecoverable error, return NULL. */ if (sp->fts_cur == NULL || ISSET(FTS_STOP)) return (NULL); /* Set current node pointer. */ p = sp->fts_cur; /* Save and zero out user instructions. */ instr = p->fts_instr; p->fts_instr = FTS_NOINSTR; /* Any type of file may be re-visited; re-stat and re-turn. */ if (instr == FTS_AGAIN) { p->fts_info = fts_stat(sp, p, 0); return (p); } /* * Following a symlink -- SLNONE test allows application to see * SLNONE and recover. If indirecting through a symlink, have * keep a pointer to current location. If unable to get that * pointer, follow fails. */ if (instr == FTS_FOLLOW && (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { p->fts_info = fts_stat(sp, p, 1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) { p->fts_errno = errno; p->fts_info = FTS_ERR; } else p->fts_flags |= FTS_SYMFOLLOW; } return (p); } /* Directory in pre-order. */ if (p->fts_info == FTS_D) { /* If skipped or crossed mount point, do post-order visit. */ if (instr == FTS_SKIP || (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) { if (p->fts_flags & FTS_SYMFOLLOW) (void)_close(p->fts_symfd); if (sp->fts_child) { fts_lfree(sp->fts_child); sp->fts_child = NULL; } p->fts_info = FTS_DP; return (p); } /* Rebuild if only read the names and now traversing. */ if (sp->fts_child != NULL && ISSET(FTS_NAMEONLY)) { CLR(FTS_NAMEONLY); fts_lfree(sp->fts_child); sp->fts_child = NULL; } /* * Cd to the subdirectory. * * If have already read and now fail to chdir, whack the list * to make the names come out right, and set the parent errno * so the application will eventually get an error condition. * Set the FTS_DONTCHDIR flag so that when we logically change * directories back to the parent we don't do a chdir. * * If haven't read do so. If the read fails, fts_build sets * FTS_STOP or the fts_info field of the node. */ if (sp->fts_child != NULL) { if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) { p->fts_errno = errno; p->fts_flags |= FTS_DONTCHDIR; for (p = sp->fts_child; p != NULL; p = p->fts_link) p->fts_accpath = p->fts_parent->fts_accpath; } } else if ((sp->fts_child = fts_build(sp, BREAD)) == NULL) { if (ISSET(FTS_STOP)) return (NULL); return (p); } p = sp->fts_child; sp->fts_child = NULL; goto name; } /* Move to the next node on this level. */ next: tmp = p; if ((p = p->fts_link) != NULL) { free(tmp); /* * If reached the top, return to the original directory (or * the root of the tree), and load the paths for the next root. */ if (p->fts_level == FTS_ROOTLEVEL) { if (FCHDIR(sp, sp->fts_rfd)) { SET(FTS_STOP); return (NULL); } fts_load(sp, p); return (sp->fts_cur = p); } /* * User may have called fts_set on the node. If skipped, * ignore. If followed, get a file descriptor so we can * get back if necessary. */ if (p->fts_instr == FTS_SKIP) goto next; if (p->fts_instr == FTS_FOLLOW) { p->fts_info = fts_stat(sp, p, 1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) { p->fts_errno = errno; p->fts_info = FTS_ERR; } else p->fts_flags |= FTS_SYMFOLLOW; } p->fts_instr = FTS_NOINSTR; } name: t = sp->fts_path + NAPPEND(p->fts_parent); *t++ = '/'; memmove(t, p->fts_name, p->fts_namelen + 1); return (sp->fts_cur = p); } /* Move up to the parent node. */ p = tmp->fts_parent; free(tmp); if (p->fts_level == FTS_ROOTPARENTLEVEL) { /* * Done; free everything up and set errno to 0 so the user * can distinguish between error and EOF. */ free(p); errno = 0; return (sp->fts_cur = NULL); } /* NUL terminate the pathname. */ sp->fts_path[p->fts_pathlen] = '\0'; /* * Return to the parent directory. If at a root node or came through * a symlink, go back through the file descriptor. Otherwise, cd up * one directory. */ if (p->fts_level == FTS_ROOTLEVEL) { if (FCHDIR(sp, sp->fts_rfd)) { SET(FTS_STOP); return (NULL); } } else if (p->fts_flags & FTS_SYMFOLLOW) { if (FCHDIR(sp, p->fts_symfd)) { saved_errno = errno; (void)_close(p->fts_symfd); errno = saved_errno; SET(FTS_STOP); return (NULL); } (void)_close(p->fts_symfd); } else if (!(p->fts_flags & FTS_DONTCHDIR) && fts_safe_changedir(sp, p->fts_parent, -1, "..")) { SET(FTS_STOP); return (NULL); } p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; return (sp->fts_cur = p); } /* * Fts_set takes the stream as an argument although it's not used in this * implementation; it would be necessary if anyone wanted to add global * semantics to fts using fts_set. An error return is allowed for similar * reasons. */ /* ARGSUSED */ int __fts_set_44bsd(FTS *sp, FTSENT *p, int instr) { if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW && instr != FTS_NOINSTR && instr != FTS_SKIP) { errno = EINVAL; return (1); } p->fts_instr = instr; return (0); } FTSENT * __fts_children_44bsd(FTS *sp, int instr) { FTSENT *p; int fd; if (instr != 0 && instr != FTS_NAMEONLY) { errno = EINVAL; return (NULL); } /* Set current node pointer. */ p = sp->fts_cur; /* * Errno set to 0 so user can distinguish empty directory from * an error. */ errno = 0; /* Fatal errors stop here. */ if (ISSET(FTS_STOP)) return (NULL); /* Return logical hierarchy of user's arguments. */ if (p->fts_info == FTS_INIT) return (p->fts_link); /* * If not a directory being visited in pre-order, stop here. Could * allow FTS_DNR, assuming the user has fixed the problem, but the * same effect is available with FTS_AGAIN. */ if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */) return (NULL); /* Free up any previous child list. */ if (sp->fts_child != NULL) fts_lfree(sp->fts_child); if (instr == FTS_NAMEONLY) { SET(FTS_NAMEONLY); instr = BNAMES; } else instr = BCHILD; /* * If using chdir on a relative path and called BEFORE fts_read does * its chdir to the root of a traversal, we can lose -- we need to * chdir into the subdirectory, and we don't know where the current * directory is, so we can't get back so that the upcoming chdir by * fts_read will work. */ if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' || ISSET(FTS_NOCHDIR)) return (sp->fts_child = fts_build(sp, instr)); if ((fd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) return (NULL); sp->fts_child = fts_build(sp, instr); if (fchdir(fd)) { (void)_close(fd); return (NULL); } (void)_close(fd); return (sp->fts_child); } #ifndef fts_get_clientptr #error "fts_get_clientptr not defined" #endif void * (__fts_get_clientptr_44bsd)(FTS *sp) { return (fts_get_clientptr(sp)); } #ifndef fts_get_stream #error "fts_get_stream not defined" #endif FTS * (__fts_get_stream_44bsd)(FTSENT *p) { return (fts_get_stream(p)); } void __fts_set_clientptr_44bsd(FTS *sp, void *clientptr) { sp->fts_clientptr = clientptr; } /* * This is the tricky part -- do not casually change *anything* in here. The * idea is to build the linked list of entries that are used by fts_children * and fts_read. There are lots of special cases. * * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is * set and it's a physical walk (so that symbolic links can't be directories), * we can do things quickly. First, if it's a 4.4BSD file system, the type * of the file is in the directory entry. Otherwise, we assume that the number * of subdirectories in a node is equal to the number of links to the parent. * The former skips all stat calls. The latter skips stat calls in any leaf * directories and for any files after the subdirectories in the directory have * been found, cutting the stat calls by about 2/3. */ static FTSENT * fts_build(FTS *sp, int type) { - struct dirent *dp; + struct freebsd11_dirent *dp; FTSENT *p, *head; int nitems; FTSENT *cur, *tail; DIR *dirp; void *oldaddr; size_t dnamlen; int cderrno, descend, len, level, maxlen, nlinks, oflag, saved_errno, nostat, doadjust; char *cp; /* Set current node pointer. */ cur = sp->fts_cur; /* * Open the directory for reading. If this fails, we're done. * If being called from fts_read, set the fts_info field. */ #ifdef FTS_WHITEOUT if (ISSET(FTS_WHITEOUT)) oflag = DTF_NODUP | DTF_REWIND; else oflag = DTF_HIDEW | DTF_NODUP | DTF_REWIND; #else #define __opendir2(path, flag) opendir(path) #endif if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) { if (type == BREAD) { cur->fts_info = FTS_DNR; cur->fts_errno = errno; } return (NULL); } /* * Nlinks is the number of possible entries of type directory in the * directory if we're cheating on stat calls, 0 if we're not doing * any stat calls at all, -1 if we're doing stats on everything. */ if (type == BNAMES) { nlinks = 0; /* Be quiet about nostat, GCC. */ nostat = 0; } else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) { if (fts_ufslinks(sp, cur)) nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); else nlinks = -1; nostat = 1; } else { nlinks = -1; nostat = 0; } #ifdef notdef (void)printf("nlinks == %d (cur: %d)\n", nlinks, cur->fts_nlink); (void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n", ISSET(FTS_NOSTAT), ISSET(FTS_PHYSICAL), ISSET(FTS_SEEDOT)); #endif /* * If we're going to need to stat anything or we want to descend * and stay in the directory, chdir. If this fails we keep going, * but set a flag so we don't chdir after the post-order visit. * We won't be able to stat anything, but we can still return the * names themselves. Note, that since fts_read won't be able to * chdir into the directory, it will have to return different path * names than before, i.e. "a/b" instead of "b". Since the node * has already been visited in pre-order, have to wait until the * post-order visit to return the error. There is a special case * here, if there was nothing to stat then it's not an error to * not be able to stat. This is all fairly nasty. If a program * needed sorted entries or stat information, they had better be * checking FTS_NS on the returned nodes. */ cderrno = 0; if (nlinks || type == BREAD) { if (fts_safe_changedir(sp, cur, _dirfd(dirp), NULL)) { if (nlinks && type == BREAD) cur->fts_errno = errno; cur->fts_flags |= FTS_DONTCHDIR; descend = 0; cderrno = errno; } else descend = 1; } else descend = 0; /* * Figure out the max file name length that can be stored in the * current path -- the inner loop allocates more path as necessary. * We really wouldn't have to do the maxlen calculations here, we * could do them in fts_read before returning the path, but it's a * lot easier here since the length is part of the dirent structure. * * If not changing directories set a pointer so that can just append * each new name into the path. */ len = NAPPEND(cur); if (ISSET(FTS_NOCHDIR)) { cp = sp->fts_path + len; *cp++ = '/'; } else { /* GCC, you're too verbose. */ cp = NULL; } len++; maxlen = sp->fts_pathlen - len; level = cur->fts_level + 1; /* Read the directory, attaching each entry to the `link' pointer. */ doadjust = 0; - for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { + for (head = tail = NULL, nitems = 0; + dirp && (dp = freebsd11_readdir(dirp));) { dnamlen = dp->d_namlen; if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) continue; if ((p = fts_alloc(sp, dp->d_name, (int)dnamlen)) == NULL) goto mem1; if (dnamlen >= maxlen) { /* include space for NUL */ oldaddr = sp->fts_path; if (fts_palloc(sp, dnamlen + len + 1)) { /* * No more memory for path or structures. Save * errno, free up the current structure and the * structures already allocated. */ mem1: saved_errno = errno; if (p) free(p); fts_lfree(head); (void)closedir(dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); errno = saved_errno; return (NULL); } /* Did realloc() change the pointer? */ if (oldaddr != sp->fts_path) { doadjust = 1; if (ISSET(FTS_NOCHDIR)) cp = sp->fts_path + len; } maxlen = sp->fts_pathlen - len; } if (len + dnamlen >= USHRT_MAX) { /* * In an FTSENT, fts_pathlen is a u_short so it is * possible to wraparound here. If we do, free up * the current structure and the structures already * allocated, then error out with ENAMETOOLONG. */ free(p); fts_lfree(head); (void)closedir(dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); errno = ENAMETOOLONG; return (NULL); } p->fts_level = level; p->fts_parent = sp->fts_cur; p->fts_pathlen = len + dnamlen; #ifdef FTS_WHITEOUT if (dp->d_type == DT_WHT) p->fts_flags |= FTS_ISW; #endif if (cderrno) { if (nlinks) { p->fts_info = FTS_NS; p->fts_errno = cderrno; } else p->fts_info = FTS_NSOK; p->fts_accpath = cur->fts_accpath; } else if (nlinks == 0 #ifdef DT_DIR || (nostat && dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN) #endif ) { p->fts_accpath = ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name; p->fts_info = FTS_NSOK; } else { /* Build a file name for fts_stat to stat. */ if (ISSET(FTS_NOCHDIR)) { p->fts_accpath = p->fts_path; memmove(cp, p->fts_name, p->fts_namelen + 1); } else p->fts_accpath = p->fts_name; /* Stat it. */ p->fts_info = fts_stat(sp, p, 0); /* Decrement link count if applicable. */ if (nlinks > 0 && (p->fts_info == FTS_D || p->fts_info == FTS_DC || p->fts_info == FTS_DOT)) --nlinks; } /* We walk in directory order so "ls -f" doesn't get upset. */ p->fts_link = NULL; if (head == NULL) head = tail = p; else { tail->fts_link = p; tail = p; } ++nitems; } if (dirp) (void)closedir(dirp); /* * If realloc() changed the address of the path, adjust the * addresses for the rest of the tree and the dir list. */ if (doadjust) fts_padjust(sp, head); /* * If not changing directories, reset the path back to original * state. */ if (ISSET(FTS_NOCHDIR)) { if (len == sp->fts_pathlen || nitems == 0) --cp; *cp = '\0'; } /* * If descended after called from fts_children or after called from * fts_read and nothing found, get back. At the root level we use * the saved fd; if one of fts_open()'s arguments is a relative path * to an empty directory, we wind up here with no other way back. If * can't get back, we're done. */ if (descend && (type == BCHILD || !nitems) && (cur->fts_level == FTS_ROOTLEVEL ? FCHDIR(sp, sp->fts_rfd) : fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) { cur->fts_info = FTS_ERR; SET(FTS_STOP); return (NULL); } /* If didn't find anything, return NULL. */ if (!nitems) { if (type == BREAD) cur->fts_info = FTS_DP; return (NULL); } /* Sort the entries. */ if (sp->fts_compar && nitems > 1) head = fts_sort(sp, head, nitems); return (head); } static u_short fts_stat(FTS *sp, FTSENT *p, int follow) { FTSENT *t; - dev_t dev; - ino_t ino; - struct stat *sbp, sb; + uint32_t dev; + uint32_t ino; + struct freebsd11_stat *sbp, sb; int saved_errno; /* If user needs stat info, stat buffer already allocated. */ sbp = ISSET(FTS_NOSTAT) ? &sb : p->fts_statp; #ifdef FTS_WHITEOUT /* Check for whiteout. */ if (p->fts_flags & FTS_ISW) { if (sbp != &sb) { memset(sbp, '\0', sizeof(*sbp)); sbp->st_mode = S_IFWHT; } return (FTS_W); } #endif /* * If doing a logical walk, or application requested FTS_FOLLOW, do * a stat(2). If that fails, check for a non-existent symlink. If * fail, set the errno from the stat call. */ if (ISSET(FTS_LOGICAL) || follow) { - if (stat(p->fts_accpath, sbp)) { + if (freebsd11_stat(p->fts_accpath, sbp)) { saved_errno = errno; - if (!lstat(p->fts_accpath, sbp)) { + if (!freebsd11_lstat(p->fts_accpath, sbp)) { errno = 0; return (FTS_SLNONE); } p->fts_errno = saved_errno; goto err; } - } else if (lstat(p->fts_accpath, sbp)) { + } else if (freebsd11_lstat(p->fts_accpath, sbp)) { p->fts_errno = errno; err: memset(sbp, 0, sizeof(struct stat)); return (FTS_NS); } if (S_ISDIR(sbp->st_mode)) { /* * Set the device/inode. Used to find cycles and check for * crossing mount points. Also remember the link count, used * in fts_build to limit the number of stat calls. It is * understood that these fields are only referenced if fts_info * is set to FTS_D. */ dev = p->fts_dev = sbp->st_dev; ino = p->fts_ino = sbp->st_ino; p->fts_nlink = sbp->st_nlink; if (ISDOT(p->fts_name)) return (FTS_DOT); /* * Cycle detection is done by brute force when the directory * is first encountered. If the tree gets deep enough or the * number of symbolic links to directories is high enough, * something faster might be worthwhile. */ for (t = p->fts_parent; t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) if (ino == t->fts_ino && dev == t->fts_dev) { p->fts_cycle = t; return (FTS_DC); } return (FTS_D); } if (S_ISLNK(sbp->st_mode)) return (FTS_SL); if (S_ISREG(sbp->st_mode)) return (FTS_F); return (FTS_DEFAULT); } /* * The comparison function takes pointers to pointers to FTSENT structures. * Qsort wants a comparison function that takes pointers to void. * (Both with appropriate levels of const-poisoning, of course!) * Use a trampoline function to deal with the difference. */ static int fts_compar(const void *a, const void *b) { FTS *parent; parent = (*(const FTSENT * const *)a)->fts_fts; return (*parent->fts_compar)(a, b); } static FTSENT * fts_sort(FTS *sp, FTSENT *head, int nitems) { FTSENT **ap, *p; /* * Construct an array of pointers to the structures and call qsort(3). * Reassemble the array in the order returned by qsort. If unable to * sort for memory reasons, return the directory entries in their * current order. Allocate enough space for the current needs plus * 40 so don't realloc one entry at a time. */ if (nitems > sp->fts_nitems) { sp->fts_nitems = nitems + 40; if ((sp->fts_array = reallocf(sp->fts_array, sp->fts_nitems * sizeof(FTSENT *))) == NULL) { sp->fts_nitems = 0; return (head); } } for (ap = sp->fts_array, p = head; p; p = p->fts_link) *ap++ = p; qsort(sp->fts_array, nitems, sizeof(FTSENT *), fts_compar); for (head = *(ap = sp->fts_array); --nitems; ++ap) ap[0]->fts_link = ap[1]; ap[0]->fts_link = NULL; return (head); } static FTSENT * fts_alloc(FTS *sp, char *name, int namelen) { FTSENT *p; size_t len; struct ftsent_withstat { FTSENT ent; - struct stat statbuf; + struct freebsd11_stat statbuf; }; /* * The file name is a variable length array and no stat structure is * necessary if the user has set the nostat bit. Allocate the FTSENT * structure, the file name and the stat structure in one chunk, but * be careful that the stat structure is reasonably aligned. */ if (ISSET(FTS_NOSTAT)) len = sizeof(FTSENT) + namelen + 1; else len = sizeof(struct ftsent_withstat) + namelen + 1; if ((p = malloc(len)) == NULL) return (NULL); if (ISSET(FTS_NOSTAT)) { p->fts_name = (char *)(p + 1); p->fts_statp = NULL; } else { p->fts_name = (char *)((struct ftsent_withstat *)p + 1); p->fts_statp = &((struct ftsent_withstat *)p)->statbuf; } /* Copy the name and guarantee NUL termination. */ memcpy(p->fts_name, name, namelen); p->fts_name[namelen] = '\0'; p->fts_namelen = namelen; p->fts_path = sp->fts_path; p->fts_errno = 0; p->fts_flags = 0; p->fts_instr = FTS_NOINSTR; p->fts_number = 0; p->fts_pointer = NULL; p->fts_fts = sp; return (p); } static void fts_lfree(FTSENT *head) { FTSENT *p; /* Free a linked list of structures. */ while ((p = head)) { head = head->fts_link; free(p); } } /* * Allow essentially unlimited paths; find, rm, ls should all work on any tree. * Most systems will allow creation of paths much longer than MAXPATHLEN, even * though the kernel won't resolve them. Add the size (not just what's needed) * plus 256 bytes so don't realloc the path 2 bytes at a time. */ static int fts_palloc(FTS *sp, size_t more) { sp->fts_pathlen += more + 256; /* * Check for possible wraparound. In an FTS, fts_pathlen is * a signed int but in an FTSENT it is an unsigned short. * We limit fts_pathlen to USHRT_MAX to be safe in both cases. */ if (sp->fts_pathlen < 0 || sp->fts_pathlen >= USHRT_MAX) { if (sp->fts_path) free(sp->fts_path); sp->fts_path = NULL; errno = ENAMETOOLONG; return (1); } sp->fts_path = reallocf(sp->fts_path, sp->fts_pathlen); return (sp->fts_path == NULL); } /* * When the path is realloc'd, have to fix all of the pointers in structures * already returned. */ static void fts_padjust(FTS *sp, FTSENT *head) { FTSENT *p; char *addr = sp->fts_path; #define ADJUST(p) do { \ if ((p)->fts_accpath != (p)->fts_name) { \ (p)->fts_accpath = \ (char *)addr + ((p)->fts_accpath - (p)->fts_path); \ } \ (p)->fts_path = addr; \ } while (0) /* Adjust the current set of children. */ for (p = sp->fts_child; p; p = p->fts_link) ADJUST(p); /* Adjust the rest of the tree, including the current level. */ for (p = head; p->fts_level >= FTS_ROOTLEVEL;) { ADJUST(p); p = p->fts_link ? p->fts_link : p->fts_parent; } } static size_t fts_maxarglen(char * const *argv) { size_t len, max; for (max = 0; *argv; ++argv) if ((len = strlen(*argv)) > max) max = len; return (max + 1); } /* * Change to dir specified by fd or p->fts_accpath without getting * tricked by someone changing the world out from underneath us. * Assumes p->fts_dev and p->fts_ino are filled in. */ static int fts_safe_changedir(FTS *sp, FTSENT *p, int fd, char *path) { int ret, oerrno, newfd; - struct stat sb; + struct freebsd11_stat sb; newfd = fd; if (ISSET(FTS_NOCHDIR)) return (0); if (fd < 0 && (newfd = _open(path, O_RDONLY | O_CLOEXEC, 0)) < 0) return (-1); - if (_fstat(newfd, &sb)) { + if (freebsd11_fstat(newfd, &sb)) { ret = -1; goto bail; } if (p->fts_dev != sb.st_dev || p->fts_ino != sb.st_ino) { errno = ENOENT; /* disinformation */ ret = -1; goto bail; } ret = fchdir(newfd); bail: oerrno = errno; if (fd < 0) (void)_close(newfd); errno = oerrno; return (ret); } /* * Check if the filesystem for "ent" has UFS-style links. */ static int fts_ufslinks(FTS *sp, const FTSENT *ent) { struct _fts_private *priv; const char **cpp; priv = (struct _fts_private *)sp; /* * If this node's device is different from the previous, grab * the filesystem information, and decide on the reliability * of the link information from this filesystem for stat(2) * avoidance. */ if (priv->ftsp_dev != ent->fts_dev) { - if (statfs(ent->fts_path, &priv->ftsp_statfs) != -1) { + if (freebsd11_statfs(ent->fts_path, &priv->ftsp_statfs) != -1) { priv->ftsp_dev = ent->fts_dev; priv->ftsp_linksreliable = 0; for (cpp = ufslike_filesystems; *cpp; cpp++) { if (strcmp(priv->ftsp_statfs.f_fstypename, *cpp) == 0) { priv->ftsp_linksreliable = 1; break; } } } else { priv->ftsp_linksreliable = 0; } } return (priv->ftsp_linksreliable); } __sym_compat(fts_open, __fts_open_44bsd, FBSD_1.0); __sym_compat(fts_close, __fts_close_44bsd, FBSD_1.0); __sym_compat(fts_read, __fts_read_44bsd, FBSD_1.0); __sym_compat(fts_set, __fts_set_44bsd, FBSD_1.0); __sym_compat(fts_children, __fts_children_44bsd, FBSD_1.0); __sym_compat(fts_get_clientptr, __fts_get_clientptr_44bsd, FBSD_1.0); __sym_compat(fts_get_stream, __fts_get_stream_44bsd, FBSD_1.0); __sym_compat(fts_set_clientptr, __fts_set_clientptr_44bsd, FBSD_1.0); Index: head/lib/libc/gen/fts-compat.h =================================================================== --- head/lib/libc/gen/fts-compat.h (revision 318735) +++ head/lib/libc/gen/fts-compat.h (revision 318736) @@ -1,128 +1,128 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fts.h 8.3 (Berkeley) 8/14/94 * $FreeBSD$ */ #ifndef _FTS_H_ #define _FTS_H_ typedef struct { struct _ftsent *fts_cur; /* current node */ struct _ftsent *fts_child; /* linked list of children */ struct _ftsent **fts_array; /* sort array */ - dev_t fts_dev; /* starting device # */ + uint32_t fts_dev; /* starting device # */ char *fts_path; /* path for this descent */ int fts_rfd; /* fd for root */ int fts_pathlen; /* sizeof(path) */ int fts_nitems; /* elements in the sort array */ int (*fts_compar) /* compare function */ (const struct _ftsent * const *, const struct _ftsent * const *); #define FTS_COMFOLLOW 0x001 /* follow command line symlinks */ #define FTS_LOGICAL 0x002 /* logical walk */ #define FTS_NOCHDIR 0x004 /* don't change directories */ #define FTS_NOSTAT 0x008 /* don't get stat info */ #define FTS_PHYSICAL 0x010 /* physical walk */ #define FTS_SEEDOT 0x020 /* return dot and dot-dot */ #define FTS_XDEV 0x040 /* don't cross devices */ #define FTS_WHITEOUT 0x080 /* return whiteout information */ #define FTS_OPTIONMASK 0x0ff /* valid user option mask */ #define FTS_NAMEONLY 0x100 /* (private) child names only */ #define FTS_STOP 0x200 /* (private) unrecoverable error */ int fts_options; /* fts_open options, global flags */ void *fts_clientptr; /* thunk for sort function */ } FTS; typedef struct _ftsent { struct _ftsent *fts_cycle; /* cycle node */ struct _ftsent *fts_parent; /* parent directory */ struct _ftsent *fts_link; /* next file in directory */ union { struct { long __fts_number; /* local numeric value */ void *__fts_pointer; /* local address value */ } __struct_ftsent; int64_t __fts_bignum; } __union_ftsent; #define fts_number __union_ftsent.__struct_ftsent.__fts_number #define fts_pointer __union_ftsent.__struct_ftsent.__fts_pointer #define fts_bignum __union_ftsent.__fts_bignum char *fts_accpath; /* access path */ char *fts_path; /* root path */ int fts_errno; /* errno for this node */ int fts_symfd; /* fd for symlink */ u_short fts_pathlen; /* strlen(fts_path) */ u_short fts_namelen; /* strlen(fts_name) */ - ino_t fts_ino; /* inode */ - dev_t fts_dev; /* device */ - nlink_t fts_nlink; /* link count */ + uint32_t fts_ino; /* inode */ + uint32_t fts_dev; /* device */ + uint16_t fts_nlink; /* link count */ #define FTS_ROOTPARENTLEVEL -1 #define FTS_ROOTLEVEL 0 short fts_level; /* depth (-1 to N) */ #define FTS_D 1 /* preorder directory */ #define FTS_DC 2 /* directory that causes cycles */ #define FTS_DEFAULT 3 /* none of the above */ #define FTS_DNR 4 /* unreadable directory */ #define FTS_DOT 5 /* dot or dot-dot */ #define FTS_DP 6 /* postorder directory */ #define FTS_ERR 7 /* error; errno is set */ #define FTS_F 8 /* regular file */ #define FTS_INIT 9 /* initialized only */ #define FTS_NS 10 /* stat(2) failed */ #define FTS_NSOK 11 /* no stat(2) requested */ #define FTS_SL 12 /* symbolic link */ #define FTS_SLNONE 13 /* symbolic link without target */ #define FTS_W 14 /* whiteout object */ u_short fts_info; /* user flags for FTSENT structure */ #define FTS_DONTCHDIR 0x01 /* don't chdir .. to the parent */ #define FTS_SYMFOLLOW 0x02 /* followed a symlink to get here */ #define FTS_ISW 0x04 /* this is a whiteout object */ u_short fts_flags; /* private flags for FTSENT structure */ #define FTS_AGAIN 1 /* read node again */ #define FTS_FOLLOW 2 /* follow symbolic link */ #define FTS_NOINSTR 3 /* no instructions */ #define FTS_SKIP 4 /* discard node */ u_short fts_instr; /* fts_set() instructions */ - struct stat *fts_statp; /* stat(2) information */ + struct freebsd11_stat *fts_statp; /* stat(2) information */ char *fts_name; /* file name */ FTS *fts_fts; /* back pointer to main FTS */ } FTSENT; #define fts_get_clientptr(fts) ((fts)->fts_clientptr) #define fts_get_stream(ftsent) ((ftsent)->fts_fts) #endif /* !_FTS_H_ */ Index: head/lib/libc/gen/fts-compat11.c =================================================================== --- head/lib/libc/gen/fts-compat11.c (nonexistent) +++ head/lib/libc/gen/fts-compat11.c (revision 318736) @@ -0,0 +1,1199 @@ +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: $OpenBSD: fts.c,v 1.22 1999/10/03 19:22:22 millert Exp $ + */ + +#if 0 +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; +#endif /* LIBC_SCCS and not lint */ +#endif + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#define _WANT_FREEBSD11_STATFS +#include +#define _WANT_FREEBSD11_STAT +#include + +#define _WANT_FREEBSD11_DIRENT +#include +#include +#include +#include +#include +#include +#include +#include "gen-compat.h" +#include "fts-compat11.h" +#include "un-namespace.h" + +#include "gen-private.h" + +static FTSENT11 *fts_alloc(FTS11 *, char *, size_t); +static FTSENT11 *fts_build(FTS11 *, int); +static void fts_lfree(FTSENT11 *); +static void fts_load(FTS11 *, FTSENT11 *); +static size_t fts_maxarglen(char * const *); +static void fts_padjust(FTS11 *, FTSENT11 *); +static int fts_palloc(FTS11 *, size_t); +static FTSENT11 *fts_sort(FTS11 *, FTSENT11 *, size_t); +static int fts_stat(FTS11 *, FTSENT11 *, int, int); +static int fts_safe_changedir(FTS11 *, FTSENT11 *, int, char *); +static int fts_ufslinks(FTS11 *, const FTSENT11 *); + +#define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) + +#define CLR(opt) (sp->fts_options &= ~(opt)) +#define ISSET(opt) (sp->fts_options & (opt)) +#define SET(opt) (sp->fts_options |= (opt)) + +#define FCHDIR(sp, fd) (!ISSET(FTS_NOCHDIR) && fchdir(fd)) + +/* fts_build flags */ +#define BCHILD 1 /* fts_children */ +#define BNAMES 2 /* fts_children, names only */ +#define BREAD 3 /* fts_read */ + +/* + * Internal representation of an FTS, including extra implementation + * details. The FTS returned from fts_open points to this structure's + * ftsp_fts member (and can be cast to an _fts_private as required) + */ +struct _fts_private11 { + FTS11 ftsp_fts; + struct freebsd11_statfs ftsp_statfs; + uint32_t ftsp_dev; + int ftsp_linksreliable; +}; + +/* + * The "FTS_NOSTAT" option can avoid a lot of calls to stat(2) if it + * knows that a directory could not possibly have subdirectories. This + * is decided by looking at the link count: a subdirectory would + * increment its parent's link count by virtue of its own ".." entry. + * This assumption only holds for UFS-like filesystems that implement + * links and directories this way, so we must punt for others. + */ + +static const char *ufslike_filesystems[] = { + "ufs", + "zfs", + "nfs", + "ext2fs", + 0 +}; + +FTS11 * +freebsd11_fts_open(char * const *argv, int options, + int (*compar)(const FTSENT11 * const *, const FTSENT11 * const *)) +{ + struct _fts_private11 *priv; + FTS11 *sp; + FTSENT11 *p, *root; + FTSENT11 *parent, *tmp; + size_t len, nitems; + + /* Options check. */ + if (options & ~FTS_OPTIONMASK) { + errno = EINVAL; + return (NULL); + } + + /* fts_open() requires at least one path */ + if (*argv == NULL) { + errno = EINVAL; + return (NULL); + } + + /* Allocate/initialize the stream. */ + if ((priv = calloc(1, sizeof(*priv))) == NULL) + return (NULL); + sp = &priv->ftsp_fts; + sp->fts_compar = compar; + sp->fts_options = options; + + /* Shush, GCC. */ + tmp = NULL; + + /* Logical walks turn on NOCHDIR; symbolic links are too hard. */ + if (ISSET(FTS_LOGICAL)) + SET(FTS_NOCHDIR); + + /* + * Start out with 1K of path space, and enough, in any case, + * to hold the user's paths. + */ + if (fts_palloc(sp, MAX(fts_maxarglen(argv), MAXPATHLEN))) + goto mem1; + + /* Allocate/initialize root's parent. */ + if ((parent = fts_alloc(sp, "", 0)) == NULL) + goto mem2; + parent->fts_level = FTS_ROOTPARENTLEVEL; + + /* Allocate/initialize root(s). */ + for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) { + len = strlen(*argv); + + p = fts_alloc(sp, *argv, len); + p->fts_level = FTS_ROOTLEVEL; + p->fts_parent = parent; + p->fts_accpath = p->fts_name; + p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), -1); + + /* Command-line "." and ".." are real directories. */ + if (p->fts_info == FTS_DOT) + p->fts_info = FTS_D; + + /* + * If comparison routine supplied, traverse in sorted + * order; otherwise traverse in the order specified. + */ + if (compar) { + p->fts_link = root; + root = p; + } else { + p->fts_link = NULL; + if (root == NULL) + tmp = root = p; + else { + tmp->fts_link = p; + tmp = p; + } + } + } + if (compar && nitems > 1) + root = fts_sort(sp, root, nitems); + + /* + * Allocate a dummy pointer and make fts_read think that we've just + * finished the node before the root(s); set p->fts_info to FTS_INIT + * so that everything about the "current" node is ignored. + */ + if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL) + goto mem3; + sp->fts_cur->fts_link = root; + sp->fts_cur->fts_info = FTS_INIT; + + /* + * If using chdir(2), grab a file descriptor pointing to dot to ensure + * that we can get back here; this could be avoided for some paths, + * but almost certainly not worth the effort. Slashes, symbolic links, + * and ".." are all fairly nasty problems. Note, if we can't get the + * descriptor we run anyway, just more slowly. + */ + if (!ISSET(FTS_NOCHDIR) && + (sp->fts_rfd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) + SET(FTS_NOCHDIR); + + return (sp); + +mem3: fts_lfree(root); + free(parent); +mem2: free(sp->fts_path); +mem1: free(sp); + return (NULL); +} + +static void +fts_load(FTS11 *sp, FTSENT11 *p) +{ + size_t len; + char *cp; + + /* + * Load the stream structure for the next traversal. Since we don't + * actually enter the directory until after the preorder visit, set + * the fts_accpath field specially so the chdir gets done to the right + * place and the user can access the first node. From fts_open it's + * known that the path will fit. + */ + len = p->fts_pathlen = p->fts_namelen; + memmove(sp->fts_path, p->fts_name, len + 1); + if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) { + len = strlen(++cp); + memmove(p->fts_name, cp, len + 1); + p->fts_namelen = len; + } + p->fts_accpath = p->fts_path = sp->fts_path; + sp->fts_dev = p->fts_dev; +} + +int +freebsd11_fts_close(FTS11 *sp) +{ + FTSENT11 *freep, *p; + int saved_errno; + + /* + * This still works if we haven't read anything -- the dummy structure + * points to the root list, so we step through to the end of the root + * list which has a valid parent pointer. + */ + if (sp->fts_cur) { + for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) { + freep = p; + p = p->fts_link != NULL ? p->fts_link : p->fts_parent; + free(freep); + } + free(p); + } + + /* Free up child linked list, sort array, path buffer. */ + if (sp->fts_child) + fts_lfree(sp->fts_child); + if (sp->fts_array) + free(sp->fts_array); + free(sp->fts_path); + + /* Return to original directory, save errno if necessary. */ + if (!ISSET(FTS_NOCHDIR)) { + saved_errno = fchdir(sp->fts_rfd) ? errno : 0; + (void)_close(sp->fts_rfd); + + /* Set errno and return. */ + if (saved_errno != 0) { + /* Free up the stream pointer. */ + free(sp); + errno = saved_errno; + return (-1); + } + } + + /* Free up the stream pointer. */ + free(sp); + return (0); +} + +/* + * Special case of "/" at the end of the path so that slashes aren't + * appended which would cause paths to be written as "....//foo". + */ +#define NAPPEND(p) \ + (p->fts_path[p->fts_pathlen - 1] == '/' \ + ? p->fts_pathlen - 1 : p->fts_pathlen) + +FTSENT11 * +freebsd11_fts_read(FTS11 *sp) +{ + FTSENT11 *p, *tmp; + int instr; + char *t; + int saved_errno; + + /* If finished or unrecoverable error, return NULL. */ + if (sp->fts_cur == NULL || ISSET(FTS_STOP)) + return (NULL); + + /* Set current node pointer. */ + p = sp->fts_cur; + + /* Save and zero out user instructions. */ + instr = p->fts_instr; + p->fts_instr = FTS_NOINSTR; + + /* Any type of file may be re-visited; re-stat and re-turn. */ + if (instr == FTS_AGAIN) { + p->fts_info = fts_stat(sp, p, 0, -1); + return (p); + } + + /* + * Following a symlink -- SLNONE test allows application to see + * SLNONE and recover. If indirecting through a symlink, have + * keep a pointer to current location. If unable to get that + * pointer, follow fails. + */ + if (instr == FTS_FOLLOW && + (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { + p->fts_info = fts_stat(sp, p, 1, -1); + if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { + if ((p->fts_symfd = _open(".", O_RDONLY | O_CLOEXEC, + 0)) < 0) { + p->fts_errno = errno; + p->fts_info = FTS_ERR; + } else + p->fts_flags |= FTS_SYMFOLLOW; + } + return (p); + } + + /* Directory in pre-order. */ + if (p->fts_info == FTS_D) { + /* If skipped or crossed mount point, do post-order visit. */ + if (instr == FTS_SKIP || + (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) { + if (p->fts_flags & FTS_SYMFOLLOW) + (void)_close(p->fts_symfd); + if (sp->fts_child) { + fts_lfree(sp->fts_child); + sp->fts_child = NULL; + } + p->fts_info = FTS_DP; + return (p); + } + + /* Rebuild if only read the names and now traversing. */ + if (sp->fts_child != NULL && ISSET(FTS_NAMEONLY)) { + CLR(FTS_NAMEONLY); + fts_lfree(sp->fts_child); + sp->fts_child = NULL; + } + + /* + * Cd to the subdirectory. + * + * If have already read and now fail to chdir, whack the list + * to make the names come out right, and set the parent errno + * so the application will eventually get an error condition. + * Set the FTS_DONTCHDIR flag so that when we logically change + * directories back to the parent we don't do a chdir. + * + * If haven't read do so. If the read fails, fts_build sets + * FTS_STOP or the fts_info field of the node. + */ + if (sp->fts_child != NULL) { + if (fts_safe_changedir(sp, p, -1, p->fts_accpath)) { + p->fts_errno = errno; + p->fts_flags |= FTS_DONTCHDIR; + for (p = sp->fts_child; p != NULL; + p = p->fts_link) + p->fts_accpath = + p->fts_parent->fts_accpath; + } + } else if ((sp->fts_child = fts_build(sp, BREAD)) == NULL) { + if (ISSET(FTS_STOP)) + return (NULL); + return (p); + } + p = sp->fts_child; + sp->fts_child = NULL; + goto name; + } + + /* Move to the next node on this level. */ +next: tmp = p; + if ((p = p->fts_link) != NULL) { + /* + * If reached the top, return to the original directory (or + * the root of the tree), and load the paths for the next root. + */ + if (p->fts_level == FTS_ROOTLEVEL) { + if (FCHDIR(sp, sp->fts_rfd)) { + SET(FTS_STOP); + return (NULL); + } + free(tmp); + fts_load(sp, p); + return (sp->fts_cur = p); + } + + /* + * User may have called fts_set on the node. If skipped, + * ignore. If followed, get a file descriptor so we can + * get back if necessary. + */ + if (p->fts_instr == FTS_SKIP) { + free(tmp); + goto next; + } + if (p->fts_instr == FTS_FOLLOW) { + p->fts_info = fts_stat(sp, p, 1, -1); + if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { + if ((p->fts_symfd = + _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) { + p->fts_errno = errno; + p->fts_info = FTS_ERR; + } else + p->fts_flags |= FTS_SYMFOLLOW; + } + p->fts_instr = FTS_NOINSTR; + } + + free(tmp); + +name: t = sp->fts_path + NAPPEND(p->fts_parent); + *t++ = '/'; + memmove(t, p->fts_name, p->fts_namelen + 1); + return (sp->fts_cur = p); + } + + /* Move up to the parent node. */ + p = tmp->fts_parent; + + if (p->fts_level == FTS_ROOTPARENTLEVEL) { + /* + * Done; free everything up and set errno to 0 so the user + * can distinguish between error and EOF. + */ + free(tmp); + free(p); + errno = 0; + return (sp->fts_cur = NULL); + } + + /* NUL terminate the pathname. */ + sp->fts_path[p->fts_pathlen] = '\0'; + + /* + * Return to the parent directory. If at a root node or came through + * a symlink, go back through the file descriptor. Otherwise, cd up + * one directory. + */ + if (p->fts_level == FTS_ROOTLEVEL) { + if (FCHDIR(sp, sp->fts_rfd)) { + SET(FTS_STOP); + return (NULL); + } + } else if (p->fts_flags & FTS_SYMFOLLOW) { + if (FCHDIR(sp, p->fts_symfd)) { + saved_errno = errno; + (void)_close(p->fts_symfd); + errno = saved_errno; + SET(FTS_STOP); + return (NULL); + } + (void)_close(p->fts_symfd); + } else if (!(p->fts_flags & FTS_DONTCHDIR) && + fts_safe_changedir(sp, p->fts_parent, -1, "..")) { + SET(FTS_STOP); + return (NULL); + } + free(tmp); + p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; + return (sp->fts_cur = p); +} + +/* + * Fts_set takes the stream as an argument although it's not used in this + * implementation; it would be necessary if anyone wanted to add global + * semantics to fts using fts_set. An error return is allowed for similar + * reasons. + */ +/* ARGSUSED */ +int +freebsd11_fts_set(FTS11 *sp, FTSENT11 *p, int instr) +{ + if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW && + instr != FTS_NOINSTR && instr != FTS_SKIP) { + errno = EINVAL; + return (1); + } + p->fts_instr = instr; + return (0); +} + +FTSENT11 * +freebsd11_fts_children(FTS11 *sp, int instr) +{ + FTSENT11 *p; + int fd, rc, serrno; + + if (instr != 0 && instr != FTS_NAMEONLY) { + errno = EINVAL; + return (NULL); + } + + /* Set current node pointer. */ + p = sp->fts_cur; + + /* + * Errno set to 0 so user can distinguish empty directory from + * an error. + */ + errno = 0; + + /* Fatal errors stop here. */ + if (ISSET(FTS_STOP)) + return (NULL); + + /* Return logical hierarchy of user's arguments. */ + if (p->fts_info == FTS_INIT) + return (p->fts_link); + + /* + * If not a directory being visited in pre-order, stop here. Could + * allow FTS_DNR, assuming the user has fixed the problem, but the + * same effect is available with FTS_AGAIN. + */ + if (p->fts_info != FTS_D /* && p->fts_info != FTS_DNR */) + return (NULL); + + /* Free up any previous child list. */ + if (sp->fts_child != NULL) + fts_lfree(sp->fts_child); + + if (instr == FTS_NAMEONLY) { + SET(FTS_NAMEONLY); + instr = BNAMES; + } else + instr = BCHILD; + + /* + * If using chdir on a relative path and called BEFORE fts_read does + * its chdir to the root of a traversal, we can lose -- we need to + * chdir into the subdirectory, and we don't know where the current + * directory is, so we can't get back so that the upcoming chdir by + * fts_read will work. + */ + if (p->fts_level != FTS_ROOTLEVEL || p->fts_accpath[0] == '/' || + ISSET(FTS_NOCHDIR)) + return (sp->fts_child = fts_build(sp, instr)); + + if ((fd = _open(".", O_RDONLY | O_CLOEXEC, 0)) < 0) + return (NULL); + sp->fts_child = fts_build(sp, instr); + serrno = (sp->fts_child == NULL) ? errno : 0; + rc = fchdir(fd); + if (rc < 0 && serrno == 0) + serrno = errno; + (void)_close(fd); + errno = serrno; + if (rc < 0) + return (NULL); + return (sp->fts_child); +} + +#ifndef freebsd11_fts_get_clientptr +#error "freebsd11_fts_get_clientptr not defined" +#endif + +void * +(freebsd11_fts_get_clientptr)(FTS11 *sp) +{ + + return (freebsd11_fts_get_clientptr(sp)); +} + +#ifndef freebsd11_fts_get_stream +#error "freebsd11_fts_get_stream not defined" +#endif + +FTS11 * +(freebsd11_fts_get_stream)(FTSENT11 *p) +{ + return (freebsd11_fts_get_stream(p)); +} + +void +freebsd11_fts_set_clientptr(FTS11 *sp, void *clientptr) +{ + + sp->fts_clientptr = clientptr; +} + +/* + * This is the tricky part -- do not casually change *anything* in here. The + * idea is to build the linked list of entries that are used by fts_children + * and fts_read. There are lots of special cases. + * + * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is + * set and it's a physical walk (so that symbolic links can't be directories), + * we can do things quickly. First, if it's a 4.4BSD file system, the type + * of the file is in the directory entry. Otherwise, we assume that the number + * of subdirectories in a node is equal to the number of links to the parent. + * The former skips all stat calls. The latter skips stat calls in any leaf + * directories and for any files after the subdirectories in the directory have + * been found, cutting the stat calls by about 2/3. + */ +static FTSENT11 * +fts_build(FTS11 *sp, int type) +{ + struct freebsd11_dirent *dp; + FTSENT11 *p, *head; + FTSENT11 *cur, *tail; + DIR *dirp; + void *oldaddr; + char *cp; + int cderrno, descend, oflag, saved_errno, nostat, doadjust; + long level; + long nlinks; /* has to be signed because -1 is a magic value */ + size_t dnamlen, len, maxlen, nitems; + + /* Set current node pointer. */ + cur = sp->fts_cur; + + /* + * Open the directory for reading. If this fails, we're done. + * If being called from fts_read, set the fts_info field. + */ +#ifdef FTS_WHITEOUT + if (ISSET(FTS_WHITEOUT)) + oflag = DTF_NODUP | DTF_REWIND; + else + oflag = DTF_HIDEW | DTF_NODUP | DTF_REWIND; +#else +#define __opendir2(path, flag) opendir(path) +#endif + if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) { + if (type == BREAD) { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return (NULL); + } + + /* + * Nlinks is the number of possible entries of type directory in the + * directory if we're cheating on stat calls, 0 if we're not doing + * any stat calls at all, -1 if we're doing stats on everything. + */ + if (type == BNAMES) { + nlinks = 0; + /* Be quiet about nostat, GCC. */ + nostat = 0; + } else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL)) { + if (fts_ufslinks(sp, cur)) + nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2); + else + nlinks = -1; + nostat = 1; + } else { + nlinks = -1; + nostat = 0; + } + +#ifdef notdef + (void)printf("nlinks == %d (cur: %d)\n", nlinks, cur->fts_nlink); + (void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n", + ISSET(FTS_NOSTAT), ISSET(FTS_PHYSICAL), ISSET(FTS_SEEDOT)); +#endif + /* + * If we're going to need to stat anything or we want to descend + * and stay in the directory, chdir. If this fails we keep going, + * but set a flag so we don't chdir after the post-order visit. + * We won't be able to stat anything, but we can still return the + * names themselves. Note, that since fts_read won't be able to + * chdir into the directory, it will have to return different path + * names than before, i.e. "a/b" instead of "b". Since the node + * has already been visited in pre-order, have to wait until the + * post-order visit to return the error. There is a special case + * here, if there was nothing to stat then it's not an error to + * not be able to stat. This is all fairly nasty. If a program + * needed sorted entries or stat information, they had better be + * checking FTS_NS on the returned nodes. + */ + cderrno = 0; + if (nlinks || type == BREAD) { + if (fts_safe_changedir(sp, cur, _dirfd(dirp), NULL)) { + if (nlinks && type == BREAD) + cur->fts_errno = errno; + cur->fts_flags |= FTS_DONTCHDIR; + descend = 0; + cderrno = errno; + } else + descend = 1; + } else + descend = 0; + + /* + * Figure out the max file name length that can be stored in the + * current path -- the inner loop allocates more path as necessary. + * We really wouldn't have to do the maxlen calculations here, we + * could do them in fts_read before returning the path, but it's a + * lot easier here since the length is part of the dirent structure. + * + * If not changing directories set a pointer so that can just append + * each new name into the path. + */ + len = NAPPEND(cur); + if (ISSET(FTS_NOCHDIR)) { + cp = sp->fts_path + len; + *cp++ = '/'; + } else { + /* GCC, you're too verbose. */ + cp = NULL; + } + len++; + maxlen = sp->fts_pathlen - len; + + level = cur->fts_level + 1; + + /* Read the directory, attaching each entry to the `link' pointer. */ + doadjust = 0; + for (head = tail = NULL, nitems = 0; + dirp && (dp = freebsd11_readdir(dirp));) { + dnamlen = dp->d_namlen; + if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) + continue; + + if ((p = fts_alloc(sp, dp->d_name, dnamlen)) == NULL) + goto mem1; + if (dnamlen >= maxlen) { /* include space for NUL */ + oldaddr = sp->fts_path; + if (fts_palloc(sp, dnamlen + len + 1)) { + /* + * No more memory for path or structures. Save + * errno, free up the current structure and the + * structures already allocated. + */ +mem1: saved_errno = errno; + if (p) + free(p); + fts_lfree(head); + (void)closedir(dirp); + cur->fts_info = FTS_ERR; + SET(FTS_STOP); + errno = saved_errno; + return (NULL); + } + /* Did realloc() change the pointer? */ + if (oldaddr != sp->fts_path) { + doadjust = 1; + if (ISSET(FTS_NOCHDIR)) + cp = sp->fts_path + len; + } + maxlen = sp->fts_pathlen - len; + } + + p->fts_level = level; + p->fts_parent = sp->fts_cur; + p->fts_pathlen = len + dnamlen; + +#ifdef FTS_WHITEOUT + if (dp->d_type == DT_WHT) + p->fts_flags |= FTS_ISW; +#endif + + if (cderrno) { + if (nlinks) { + p->fts_info = FTS_NS; + p->fts_errno = cderrno; + } else + p->fts_info = FTS_NSOK; + p->fts_accpath = cur->fts_accpath; + } else if (nlinks == 0 +#ifdef DT_DIR + || (nostat && + dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN) +#endif + ) { + p->fts_accpath = + ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name; + p->fts_info = FTS_NSOK; + } else { + /* Build a file name for fts_stat to stat. */ + if (ISSET(FTS_NOCHDIR)) { + p->fts_accpath = p->fts_path; + memmove(cp, p->fts_name, p->fts_namelen + 1); + p->fts_info = fts_stat(sp, p, 0, _dirfd(dirp)); + } else { + p->fts_accpath = p->fts_name; + p->fts_info = fts_stat(sp, p, 0, -1); + } + + /* Decrement link count if applicable. */ + if (nlinks > 0 && (p->fts_info == FTS_D || + p->fts_info == FTS_DC || p->fts_info == FTS_DOT)) + --nlinks; + } + + /* We walk in directory order so "ls -f" doesn't get upset. */ + p->fts_link = NULL; + if (head == NULL) + head = tail = p; + else { + tail->fts_link = p; + tail = p; + } + ++nitems; + } + if (dirp) + (void)closedir(dirp); + + /* + * If realloc() changed the address of the path, adjust the + * addresses for the rest of the tree and the dir list. + */ + if (doadjust) + fts_padjust(sp, head); + + /* + * If not changing directories, reset the path back to original + * state. + */ + if (ISSET(FTS_NOCHDIR)) + sp->fts_path[cur->fts_pathlen] = '\0'; + + /* + * If descended after called from fts_children or after called from + * fts_read and nothing found, get back. At the root level we use + * the saved fd; if one of fts_open()'s arguments is a relative path + * to an empty directory, we wind up here with no other way back. If + * can't get back, we're done. + */ + if (descend && (type == BCHILD || !nitems) && + (cur->fts_level == FTS_ROOTLEVEL ? + FCHDIR(sp, sp->fts_rfd) : + fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) { + fts_lfree(head); + cur->fts_info = FTS_ERR; + SET(FTS_STOP); + return (NULL); + } + + /* If didn't find anything, return NULL. */ + if (!nitems) { + if (type == BREAD) + cur->fts_info = FTS_DP; + return (NULL); + } + + /* Sort the entries. */ + if (sp->fts_compar && nitems > 1) + head = fts_sort(sp, head, nitems); + return (head); +} + +static int +fts_stat(FTS11 *sp, FTSENT11 *p, int follow, int dfd) +{ + FTSENT11 *t; + uint32_t dev; + uint32_t ino; + struct freebsd11_stat *sbp, sb; + int saved_errno; + const char *path; + + if (dfd == -1) + path = p->fts_accpath, dfd = AT_FDCWD; + else + path = p->fts_name; + + /* If user needs stat info, stat buffer already allocated. */ + sbp = ISSET(FTS_NOSTAT) ? &sb : p->fts_statp; + +#ifdef FTS_WHITEOUT + /* Check for whiteout. */ + if (p->fts_flags & FTS_ISW) { + if (sbp != &sb) { + memset(sbp, '\0', sizeof(*sbp)); + sbp->st_mode = S_IFWHT; + } + return (FTS_W); + } +#endif + + /* + * If doing a logical walk, or application requested FTS_FOLLOW, do + * a stat(2). If that fails, check for a non-existent symlink. If + * fail, set the errno from the stat call. + */ + if (ISSET(FTS_LOGICAL) || follow) { + if (freebsd11_fstatat(dfd, path, sbp, 0)) { + saved_errno = errno; + if (freebsd11_fstatat(dfd, path, sbp, + AT_SYMLINK_NOFOLLOW)) { + p->fts_errno = saved_errno; + goto err; + } + errno = 0; + if (S_ISLNK(sbp->st_mode)) + return (FTS_SLNONE); + } + } else if (freebsd11_fstatat(dfd, path, sbp, AT_SYMLINK_NOFOLLOW)) { + p->fts_errno = errno; +err: memset(sbp, 0, sizeof(*sbp)); + return (FTS_NS); + } + + if (S_ISDIR(sbp->st_mode)) { + /* + * Set the device/inode. Used to find cycles and check for + * crossing mount points. Also remember the link count, used + * in fts_build to limit the number of stat calls. It is + * understood that these fields are only referenced if fts_info + * is set to FTS_D. + */ + dev = p->fts_dev = sbp->st_dev; + ino = p->fts_ino = sbp->st_ino; + p->fts_nlink = sbp->st_nlink; + + if (ISDOT(p->fts_name)) + return (FTS_DOT); + + /* + * Cycle detection is done by brute force when the directory + * is first encountered. If the tree gets deep enough or the + * number of symbolic links to directories is high enough, + * something faster might be worthwhile. + */ + for (t = p->fts_parent; + t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) + if (ino == t->fts_ino && dev == t->fts_dev) { + p->fts_cycle = t; + return (FTS_DC); + } + return (FTS_D); + } + if (S_ISLNK(sbp->st_mode)) + return (FTS_SL); + if (S_ISREG(sbp->st_mode)) + return (FTS_F); + return (FTS_DEFAULT); +} + +/* + * The comparison function takes pointers to pointers to FTSENT structures. + * Qsort wants a comparison function that takes pointers to void. + * (Both with appropriate levels of const-poisoning, of course!) + * Use a trampoline function to deal with the difference. + */ +static int +fts_compar(const void *a, const void *b) +{ + FTS11 *parent; + + parent = (*(const FTSENT11 * const *)a)->fts_fts; + return (*parent->fts_compar)(a, b); +} + +static FTSENT11 * +fts_sort(FTS11 *sp, FTSENT11 *head, size_t nitems) +{ + FTSENT11 **ap, *p; + + /* + * Construct an array of pointers to the structures and call qsort(3). + * Reassemble the array in the order returned by qsort. If unable to + * sort for memory reasons, return the directory entries in their + * current order. Allocate enough space for the current needs plus + * 40 so don't realloc one entry at a time. + */ + if (nitems > sp->fts_nitems) { + sp->fts_nitems = nitems + 40; + if ((sp->fts_array = reallocf(sp->fts_array, + sp->fts_nitems * sizeof(FTSENT11 *))) == NULL) { + sp->fts_nitems = 0; + return (head); + } + } + for (ap = sp->fts_array, p = head; p; p = p->fts_link) + *ap++ = p; + qsort(sp->fts_array, nitems, sizeof(FTSENT11 *), fts_compar); + for (head = *(ap = sp->fts_array); --nitems; ++ap) + ap[0]->fts_link = ap[1]; + ap[0]->fts_link = NULL; + return (head); +} + +static FTSENT11 * +fts_alloc(FTS11 *sp, char *name, size_t namelen) +{ + FTSENT11 *p; + size_t len; + + struct ftsent11_withstat { + FTSENT11 ent; + struct freebsd11_stat statbuf; + }; + + /* + * The file name is a variable length array and no stat structure is + * necessary if the user has set the nostat bit. Allocate the FTSENT + * structure, the file name and the stat structure in one chunk, but + * be careful that the stat structure is reasonably aligned. + */ + if (ISSET(FTS_NOSTAT)) + len = sizeof(FTSENT11) + namelen + 1; + else + len = sizeof(struct ftsent11_withstat) + namelen + 1; + + if ((p = malloc(len)) == NULL) + return (NULL); + + if (ISSET(FTS_NOSTAT)) { + p->fts_name = (char *)(p + 1); + p->fts_statp = NULL; + } else { + p->fts_name = (char *)((struct ftsent11_withstat *)p + 1); + p->fts_statp = &((struct ftsent11_withstat *)p)->statbuf; + } + + /* Copy the name and guarantee NUL termination. */ + memcpy(p->fts_name, name, namelen); + p->fts_name[namelen] = '\0'; + p->fts_namelen = namelen; + p->fts_path = sp->fts_path; + p->fts_errno = 0; + p->fts_flags = 0; + p->fts_instr = FTS_NOINSTR; + p->fts_number = 0; + p->fts_pointer = NULL; + p->fts_fts = sp; + return (p); +} + +static void +fts_lfree(FTSENT11 *head) +{ + FTSENT11 *p; + + /* Free a linked list of structures. */ + while ((p = head)) { + head = head->fts_link; + free(p); + } +} + +/* + * Allow essentially unlimited paths; find, rm, ls should all work on any tree. + * Most systems will allow creation of paths much longer than MAXPATHLEN, even + * though the kernel won't resolve them. Add the size (not just what's needed) + * plus 256 bytes so don't realloc the path 2 bytes at a time. + */ +static int +fts_palloc(FTS11 *sp, size_t more) +{ + + sp->fts_pathlen += more + 256; + sp->fts_path = reallocf(sp->fts_path, sp->fts_pathlen); + return (sp->fts_path == NULL); +} + +/* + * When the path is realloc'd, have to fix all of the pointers in structures + * already returned. + */ +static void +fts_padjust(FTS11 *sp, FTSENT11 *head) +{ + FTSENT11 *p; + char *addr = sp->fts_path; + +#define ADJUST(p) do { \ + if ((p)->fts_accpath != (p)->fts_name) { \ + (p)->fts_accpath = \ + (char *)addr + ((p)->fts_accpath - (p)->fts_path); \ + } \ + (p)->fts_path = addr; \ +} while (0) + /* Adjust the current set of children. */ + for (p = sp->fts_child; p; p = p->fts_link) + ADJUST(p); + + /* Adjust the rest of the tree, including the current level. */ + for (p = head; p->fts_level >= FTS_ROOTLEVEL;) { + ADJUST(p); + p = p->fts_link ? p->fts_link : p->fts_parent; + } +} + +static size_t +fts_maxarglen(char * const *argv) +{ + size_t len, max; + + for (max = 0; *argv; ++argv) + if ((len = strlen(*argv)) > max) + max = len; + return (max + 1); +} + +/* + * Change to dir specified by fd or p->fts_accpath without getting + * tricked by someone changing the world out from underneath us. + * Assumes p->fts_dev and p->fts_ino are filled in. + */ +static int +fts_safe_changedir(FTS11 *sp, FTSENT11 *p, int fd, char *path) +{ + int ret, oerrno, newfd; + struct freebsd11_stat sb; + + newfd = fd; + if (ISSET(FTS_NOCHDIR)) + return (0); + if (fd < 0 && (newfd = _open(path, O_RDONLY | O_DIRECTORY | + O_CLOEXEC, 0)) < 0) + return (-1); + if (freebsd11_fstat(newfd, &sb)) { + ret = -1; + goto bail; + } + if (p->fts_dev != sb.st_dev || p->fts_ino != sb.st_ino) { + errno = ENOENT; /* disinformation */ + ret = -1; + goto bail; + } + ret = fchdir(newfd); +bail: + oerrno = errno; + if (fd < 0) + (void)_close(newfd); + errno = oerrno; + return (ret); +} + +/* + * Check if the filesystem for "ent" has UFS-style links. + */ +static int +fts_ufslinks(FTS11 *sp, const FTSENT11 *ent) +{ + struct _fts_private11 *priv; + const char **cpp; + + priv = (struct _fts_private11 *)sp; + /* + * If this node's device is different from the previous, grab + * the filesystem information, and decide on the reliability + * of the link information from this filesystem for stat(2) + * avoidance. + */ + if (priv->ftsp_dev != ent->fts_dev) { + if (freebsd11_statfs(ent->fts_path, &priv->ftsp_statfs) != -1) { + priv->ftsp_dev = ent->fts_dev; + priv->ftsp_linksreliable = 0; + for (cpp = ufslike_filesystems; *cpp; cpp++) { + if (strcmp(priv->ftsp_statfs.f_fstypename, + *cpp) == 0) { + priv->ftsp_linksreliable = 1; + break; + } + } + } else { + priv->ftsp_linksreliable = 0; + } + } + return (priv->ftsp_linksreliable); +} + +__sym_compat(fts_open, freebsd11_fts_open, FBSD_1.1); +__sym_compat(fts_close, freebsd11_fts_close, FBSD_1.1); +__sym_compat(fts_read, freebsd11_fts_read, FBSD_1.1); +__sym_compat(fts_set, freebsd11_fts_set, FBSD_1.1); +__sym_compat(fts_children, freebsd11_fts_children, FBSD_1.1); +__sym_compat(fts_get_clientptr, freebsd11_fts_get_clientptr, FBSD_1.1); +__sym_compat(fts_get_stream, freebsd11_fts_get_stream, FBSD_1.1); +__sym_compat(fts_set_clientptr, freebsd11_fts_set_clientptr, FBSD_1.1); Property changes on: head/lib/libc/gen/fts-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/fts-compat11.h =================================================================== --- head/lib/libc/gen/fts-compat11.h (nonexistent) +++ head/lib/libc/gen/fts-compat11.h (revision 318736) @@ -0,0 +1,95 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fts.h 8.3 (Berkeley) 8/14/94 + * $FreeBSD$ + */ + +#ifndef _FTS_COPMAT11_H_ +#define _FTS_COPMAT11_H_ + +typedef struct { + struct _ftsent11 *fts_cur; /* current node */ + struct _ftsent11 *fts_child; /* linked list of children */ + struct _ftsent11 **fts_array; /* sort array */ + uint32_t fts_dev; /* starting device # */ + char *fts_path; /* path for this descent */ + int fts_rfd; /* fd for root */ + __size_t fts_pathlen; /* sizeof(path) */ + __size_t fts_nitems; /* elements in the sort array */ + int (*fts_compar) /* compare function */ + (const struct _ftsent11 * const *, + const struct _ftsent11 * const *); + int fts_options; /* fts_open options, global flags */ + void *fts_clientptr; /* thunk for sort function */ +} FTS11; + +typedef struct _ftsent11 { + struct _ftsent11 *fts_cycle; /* cycle node */ + struct _ftsent11 *fts_parent; /* parent directory */ + struct _ftsent11 *fts_link; /* next file in directory */ + long long fts_number; /* local numeric value */ + void *fts_pointer; /* local address value */ + char *fts_accpath; /* access path */ + char *fts_path; /* root path */ + int fts_errno; /* errno for this node */ + int fts_symfd; /* fd for symlink */ + __size_t fts_pathlen; /* strlen(fts_path) */ + __size_t fts_namelen; /* strlen(fts_name) */ + + uint32_t fts_ino; /* inode */ + uint32_t fts_dev; /* device */ + uint16_t fts_nlink; /* link count */ + + long fts_level; /* depth (-1 to N) */ + + int fts_info; /* user status for FTSENT structure */ + + unsigned fts_flags; /* private flags for FTSENT structure */ + + int fts_instr; /* fts_set() instructions */ + + struct freebsd11_stat *fts_statp; /* stat(2) information */ + char *fts_name; /* file name */ + FTS11 *fts_fts; /* back pointer to main FTS */ +} FTSENT11; + +FTSENT11 *freebsd11_fts_children(FTS11 *, int); +int freebsd11_fts_close(FTS11 *); +void *freebsd11_fts_get_clientptr(FTS11 *); +#define freebsd11_fts_get_clientptr(fts) ((fts)->fts_clientptr) +FTS11 *freebsd11_fts_get_stream(FTSENT11 *); +#define freebsd11_fts_get_stream(ftsent) ((ftsent)->fts_fts) +FTS11 *freebsd11_fts_open(char * const *, int, + int (*)(const FTSENT11 * const *, + const FTSENT11 * const *)); +FTSENT11 *freebsd11_fts_read(FTS11 *); +int freebsd11_fts_set(FTS11 *, FTSENT11 *, int); +void freebsd11_fts_set_clientptr(FTS11 *, void *); + +#endif /* !_FTS_COMPAT11_H_ */ Property changes on: head/lib/libc/gen/fts-compat11.h ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/ftw-compat11.c =================================================================== --- head/lib/libc/gen/ftw-compat11.c (nonexistent) +++ head/lib/libc/gen/ftw-compat11.c (revision 318736) @@ -0,0 +1,98 @@ +/* $OpenBSD: ftw.c,v 1.5 2005/08/08 08:05:34 espie Exp $ */ + +/* + * Copyright (c) 2003, 2004 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + * + * from: $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include "fts-compat11.h" + +int +freebsd11_ftw(const char *path, + int (*fn)(const char *, const struct freebsd11_stat *, int), int nfds) +{ + char * const paths[2] = { (char *)path, NULL }; + FTSENT11 *cur; + FTS11 *ftsp; + int error = 0, fnflag, sverrno; + + /* XXX - nfds is currently unused */ + if (nfds < 1) { + errno = EINVAL; + return (-1); + } + + ftsp = freebsd11_fts_open(paths, + FTS_LOGICAL | FTS_COMFOLLOW | FTS_NOCHDIR, NULL); + if (ftsp == NULL) + return (-1); + while ((cur = freebsd11_fts_read(ftsp)) != NULL) { + switch (cur->fts_info) { + case FTS_D: + fnflag = FTW_D; + break; + case FTS_DNR: + fnflag = FTW_DNR; + break; + case FTS_DP: + /* we only visit in preorder */ + continue; + case FTS_F: + case FTS_DEFAULT: + fnflag = FTW_F; + break; + case FTS_NS: + case FTS_NSOK: + case FTS_SLNONE: + fnflag = FTW_NS; + break; + case FTS_SL: + fnflag = FTW_SL; + break; + case FTS_DC: + errno = ELOOP; + /* FALLTHROUGH */ + default: + error = -1; + goto done; + } + error = fn(cur->fts_path, cur->fts_statp, fnflag); + if (error != 0) + break; + } +done: + sverrno = errno; + if (freebsd11_fts_close(ftsp) != 0 && error == 0) + error = -1; + else + errno = sverrno; + return (error); +} + +__sym_compat(ftw, freebsd11_ftw, FBSD_1.0); Property changes on: head/lib/libc/gen/ftw-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/gen-compat.h =================================================================== --- head/lib/libc/gen/gen-compat.h (nonexistent) +++ head/lib/libc/gen/gen-compat.h (revision 318736) @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2012 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _GEN_COMPAT_H_ +#define _GEN_COMPAT_H_ + +#include + +#define FREEBSD11_DIRSIZ(dp) \ + (sizeof(struct freebsd11_dirent) - sizeof((dp)->d_name) + \ + (((dp)->d_namlen + 1 + 3) &~ 3)) + +struct freebsd11_dirent; +struct freebsd11_stat; +struct freebsd11_statfs; + +struct freebsd11_dirent *freebsd11_readdir(DIR *); +int freebsd11_readdir_r(DIR *, struct freebsd11_dirent *, + struct freebsd11_dirent **); +int freebsd11_stat(const char *, struct freebsd11_stat *); +int freebsd11_lstat(const char *, struct freebsd11_stat *); +int freebsd11_fstat(int, struct freebsd11_stat *); +int freebsd11_fstatat(int, const char *, struct freebsd11_stat *, int); + +int freebsd11_statfs(const char *, struct freebsd11_statfs *); +int freebsd11_getfsstat(struct freebsd11_statfs *, long, int); +int freebsd11_getmntinfo(struct freebsd11_statfs **, int); + +char *freebsd11_devname(__uint32_t dev, __mode_t type); +char *freebsd11_devname_r(__uint32_t dev, __mode_t type, char *buf, int len); + +#endif /* _GEN_COMPAT_H_ */ Property changes on: head/lib/libc/gen/gen-compat.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/gen/gen-private.h =================================================================== --- head/lib/libc/gen/gen-private.h (revision 318735) +++ head/lib/libc/gen/gen-private.h (revision 318736) @@ -1,58 +1,62 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEN_PRIVATE_H_ #define _GEN_PRIVATE_H_ struct _telldir; /* see telldir.h */ struct pthread_mutex; /* * Structure describing an open directory. * * NOTE. Change structure layout with care, at least dd_fd field has to * remain unchanged to guarantee backward compatibility. */ struct _dirdesc { int dd_fd; /* file descriptor associated with directory */ long dd_loc; /* offset in current buffer */ long dd_size; /* amount of data returned by getdirentries */ char *dd_buf; /* data buffer */ int dd_len; /* size of data buffer */ - long dd_seek; /* magic cookie returned by getdirentries */ + off_t dd_seek; /* magic cookie returned by getdirentries */ int dd_flags; /* flags for readdir */ struct pthread_mutex *dd_lock; /* lock */ struct _telldir *dd_td; /* telldir position recording */ + void *dd_compat_de; /* compat dirent */ }; #define _dirfd(dirp) ((dirp)->dd_fd) + +struct dirent; +int __readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result); #endif /* !_GEN_PRIVATE_H_ */ Index: head/lib/libc/gen/getmntinfo-compat11.c =================================================================== --- head/lib/libc/gen/getmntinfo-compat11.c (nonexistent) +++ head/lib/libc/gen/getmntinfo-compat11.c (revision 318736) @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)getmntinfo.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#define _WANT_FREEBSD11_STATFS +#include +#include +#include "gen-compat.h" + +/* + * Return information about mounted filesystems. + */ +int +freebsd11_getmntinfo(struct freebsd11_statfs **mntbufp, int flags) +{ + static struct freebsd11_statfs *mntbuf; + static int mntsize; + static long bufsize; + + if (mntsize <= 0 && + (mntsize = freebsd11_getfsstat(0, 0, MNT_NOWAIT)) < 0) + return (0); + if (bufsize > 0 && + (mntsize = freebsd11_getfsstat(mntbuf, bufsize, flags)) < 0) + return (0); + while (bufsize <= mntsize * sizeof(struct freebsd11_statfs)) { + if (mntbuf) + free(mntbuf); + bufsize = (mntsize + 1) * sizeof(struct freebsd11_statfs); + if ((mntbuf = (struct freebsd11_statfs *)malloc(bufsize)) == 0) + return (0); + if ((mntsize = freebsd11_getfsstat(mntbuf, bufsize, flags)) < 0) + return (0); + } + *mntbufp = mntbuf; + return (mntsize); +} + +__sym_compat(getmntinfo, freebsd11_getmntinfo, FBSD_1.0); Property changes on: head/lib/libc/gen/getmntinfo-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/glob-compat11.c =================================================================== --- head/lib/libc/gen/glob-compat11.c (nonexistent) +++ head/lib/libc/gen/glob-compat11.c (revision 318736) @@ -0,0 +1,1093 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: $FreeBSD$ + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD$"); + +#include +#define _WANT_FREEBSD11_STAT +#include + +#include +#define _WANT_FREEBSD11_DIRENT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "collate.h" +#include "gen-compat.h" +#include "glob-compat11.h" + +/* + * glob(3) expansion limits. Stop the expansion if any of these limits + * is reached. This caps the runtime in the face of DoS attacks. See + * also CVE-2010-2632 + */ +#define GLOB_LIMIT_BRACE 128 /* number of brace calls */ +#define GLOB_LIMIT_PATH 65536 /* number of path elements */ +#define GLOB_LIMIT_READDIR 16384 /* number of readdirs */ +#define GLOB_LIMIT_STAT 1024 /* number of stat system calls */ +#define GLOB_LIMIT_STRING ARG_MAX /* maximum total size for paths */ + +struct glob_limit { + size_t l_brace_cnt; + size_t l_path_lim; + size_t l_readdir_cnt; + size_t l_stat_cnt; + size_t l_string_cnt; +}; + +#define DOT L'.' +#define EOS L'\0' +#define LBRACKET L'[' +#define NOT L'!' +#define QUESTION L'?' +#define QUOTE L'\\' +#define RANGE L'-' +#define RBRACKET L']' +#define SEP L'/' +#define STAR L'*' +#define TILDE L'~' +#define LBRACE L'{' +#define RBRACE L'}' +#define COMMA L',' + +#define M_QUOTE 0x8000000000ULL +#define M_PROTECT 0x4000000000ULL +#define M_MASK 0xffffffffffULL +#define M_CHAR 0x00ffffffffULL + +typedef uint_fast64_t Char; + +#define CHAR(c) ((Char)((c)&M_CHAR)) +#define META(c) ((Char)((c)|M_QUOTE)) +#define UNPROT(c) ((c) & ~M_PROTECT) +#define M_ALL META(L'*') +#define M_END META(L']') +#define M_NOT META(L'!') +#define M_ONE META(L'?') +#define M_RNG META(L'-') +#define M_SET META(L'[') +#define ismeta(c) (((c)&M_QUOTE) != 0) +#ifdef DEBUG +#define isprot(c) (((c)&M_PROTECT) != 0) +#endif + +static int compare(const void *, const void *); +static int g_Ctoc(const Char *, char *, size_t); +static int g_lstat(Char *, struct freebsd11_stat *, glob11_t *); +static DIR *g_opendir(Char *, glob11_t *); +static const Char *g_strchr(const Char *, wchar_t); +#ifdef notdef +static Char *g_strcat(Char *, const Char *); +#endif +static int g_stat(Char *, struct freebsd11_stat *, glob11_t *); +static int glob0(const Char *, glob11_t *, struct glob_limit *, + const char *); +static int glob1(Char *, glob11_t *, struct glob_limit *); +static int glob2(Char *, Char *, Char *, Char *, glob11_t *, + struct glob_limit *); +static int glob3(Char *, Char *, Char *, Char *, Char *, glob11_t *, + struct glob_limit *); +static int globextend(const Char *, glob11_t *, struct glob_limit *, + const char *); +static const Char * + globtilde(const Char *, Char *, size_t, glob11_t *); +static int globexp0(const Char *, glob11_t *, struct glob_limit *, + const char *); +static int globexp1(const Char *, glob11_t *, struct glob_limit *); +static int globexp2(const Char *, const Char *, glob11_t *, + struct glob_limit *); +static int globfinal(glob11_t *, struct glob_limit *, size_t, + const char *); +static int match(Char *, Char *, Char *); +static int err_nomatch(glob11_t *, struct glob_limit *, const char *); +static int err_aborted(glob11_t *, int, char *); +#ifdef DEBUG +static void qprintf(const char *, Char *); +#endif + +int +freebsd11_glob(const char * __restrict pattern, int flags, + int (*errfunc)(const char *, int), glob11_t * __restrict pglob) +{ + struct glob_limit limit = { 0, 0, 0, 0, 0 }; + const char *patnext; + Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; + mbstate_t mbs; + wchar_t wc; + size_t clen; + int too_long; + + patnext = pattern; + if (!(flags & GLOB_APPEND)) { + pglob->gl_pathc = 0; + pglob->gl_pathv = NULL; + if (!(flags & GLOB_DOOFFS)) + pglob->gl_offs = 0; + } + if (flags & GLOB_LIMIT) { + limit.l_path_lim = pglob->gl_matchc; + if (limit.l_path_lim == 0) + limit.l_path_lim = GLOB_LIMIT_PATH; + } + pglob->gl_flags = flags & ~GLOB_MAGCHAR; + pglob->gl_errfunc = errfunc; + pglob->gl_matchc = 0; + + bufnext = patbuf; + bufend = bufnext + MAXPATHLEN - 1; + too_long = 1; + if (flags & GLOB_NOESCAPE) { + memset(&mbs, 0, sizeof(mbs)); + while (bufnext <= bufend) { + clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) + return (err_nomatch(pglob, &limit, pattern)); + else if (clen == 0) { + too_long = 0; + break; + } + *bufnext++ = wc; + patnext += clen; + } + } else { + /* Protect the quoted characters. */ + memset(&mbs, 0, sizeof(mbs)); + while (bufnext <= bufend) { + if (*patnext == '\\') { + if (*++patnext == '\0') { + *bufnext++ = QUOTE; + continue; + } + prot = M_PROTECT; + } else + prot = 0; + clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) + return (err_nomatch(pglob, &limit, pattern)); + else if (clen == 0) { + too_long = 0; + break; + } + *bufnext++ = wc | prot; + patnext += clen; + } + } + if (too_long) + return (err_nomatch(pglob, &limit, pattern)); + *bufnext = EOS; + + if (flags & GLOB_BRACE) + return (globexp0(patbuf, pglob, &limit, pattern)); + else + return (glob0(patbuf, pglob, &limit, pattern)); +} + +static int +globexp0(const Char *pattern, glob11_t *pglob, struct glob_limit *limit, + const char *origpat) { + int rv; + size_t oldpathc; + + /* Protect a single {}, for find(1), like csh */ + if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) { + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + return (glob0(pattern, pglob, limit, origpat)); + } + + oldpathc = pglob->gl_pathc; + + if ((rv = globexp1(pattern, pglob, limit)) != 0) + return rv; + + return (globfinal(pglob, limit, oldpathc, origpat)); +} + +/* + * Expand recursively a glob {} pattern. When there is no more expansion + * invoke the standard globbing routine to glob the rest of the magic + * characters + */ +static int +globexp1(const Char *pattern, glob11_t *pglob, struct glob_limit *limit) +{ + const Char* ptr; + + if ((ptr = g_strchr(pattern, LBRACE)) != NULL) { + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + return (globexp2(ptr, pattern, pglob, limit)); + } + + return (glob0(pattern, pglob, limit, NULL)); +} + + +/* + * Recursive brace globbing helper. Tries to expand a single brace. + * If it succeeds then it invokes globexp1 with the new pattern. + * If it fails then it tries to glob the rest of the pattern and returns. + */ +static int +globexp2(const Char *ptr, const Char *pattern, glob11_t *pglob, + struct glob_limit *limit) +{ + int i, rv; + Char *lm, *ls; + const Char *pe, *pm, *pm1, *pl; + Char patbuf[MAXPATHLEN]; + + /* copy part up to the brace */ + for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) + continue; + *lm = EOS; + ls = lm; + + /* Find the balanced brace */ + for (i = 0, pe = ++ptr; *pe != EOS; pe++) + if (*pe == LBRACKET) { + /* Ignore everything between [] */ + for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) + continue; + if (*pe == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pe = pm; + } + } + else if (*pe == LBRACE) + i++; + else if (*pe == RBRACE) { + if (i == 0) + break; + i--; + } + + /* Non matching braces; just glob the pattern */ + if (i != 0 || *pe == EOS) + return (glob0(pattern, pglob, limit, NULL)); + + for (i = 0, pl = pm = ptr; pm <= pe; pm++) + switch (*pm) { + case LBRACKET: + /* Ignore everything between [] */ + for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) + continue; + if (*pm == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pm = pm1; + } + break; + + case LBRACE: + i++; + break; + + case RBRACE: + if (i) { + i--; + break; + } + /* FALLTHROUGH */ + case COMMA: + if (i && *pm == COMMA) + break; + else { + /* Append the current string */ + for (lm = ls; (pl < pm); *lm++ = *pl++) + continue; + /* + * Append the rest of the pattern after the + * closing brace + */ + for (pl = pe + 1; (*lm++ = *pl++) != EOS;) + continue; + + /* Expand the current pattern */ +#ifdef DEBUG + qprintf("globexp2:", patbuf); +#endif + rv = globexp1(patbuf, pglob, limit); + if (rv) + return (rv); + + /* move after the comma, to the next string */ + pl = pm + 1; + } + break; + + default: + break; + } + return (0); +} + + + +/* + * expand tilde from the passwd file. + */ +static const Char * +globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob11_t *pglob) +{ + struct passwd *pwd; + char *h, *sc; + const Char *p; + Char *b, *eb; + wchar_t wc; + wchar_t wbuf[MAXPATHLEN]; + wchar_t *wbufend, *dc; + size_t clen; + mbstate_t mbs; + int too_long; + + if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) + return (pattern); + + /* + * Copy up to the end of the string or / + */ + eb = &patbuf[patbuf_len - 1]; + for (p = pattern + 1, b = patbuf; + b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++) + continue; + + if (*p != EOS && UNPROT(*p) != SEP) + return (NULL); + + *b = EOS; + h = NULL; + + if (patbuf[0] == EOS) { + /* + * handle a plain ~ or ~/ by expanding $HOME first (iff + * we're not running setuid or setgid) and then trying + * the password file + */ + if (issetugid() != 0 || + (h = getenv("HOME")) == NULL) { + if (((h = getlogin()) != NULL && + (pwd = getpwnam(h)) != NULL) || + (pwd = getpwuid(getuid())) != NULL) + h = pwd->pw_dir; + else + return (pattern); + } + } + else { + /* + * Expand a ~user + */ + if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf))) + return (NULL); + if ((pwd = getpwnam((char *)wbuf)) == NULL) + return (pattern); + else + h = pwd->pw_dir; + } + + /* Copy the home directory */ + dc = wbuf; + sc = h; + wbufend = wbuf + MAXPATHLEN - 1; + too_long = 1; + memset(&mbs, 0, sizeof(mbs)); + while (dc <= wbufend) { + clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) { + /* XXX See initial comment #2. */ + wc = (unsigned char)*sc; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if ((*dc++ = wc) == EOS) { + too_long = 0; + break; + } + sc += clen; + } + if (too_long) + return (NULL); + + dc = wbuf; + for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT) + continue; + if (*dc != EOS) + return (NULL); + + /* Append the rest of the pattern */ + if (*p != EOS) { + too_long = 1; + while (b <= eb) { + if ((*b++ = *p++) == EOS) { + too_long = 0; + break; + } + } + if (too_long) + return (NULL); + } else + *b = EOS; + + return (patbuf); +} + + +/* + * The main glob() routine: compiles the pattern (optionally processing + * quotes), calls glob1() to do the real pattern matching, and finally + * sorts the list (unless unsorted operation is requested). Returns 0 + * if things went well, nonzero if errors occurred. + */ +static int +glob0(const Char *pattern, glob11_t *pglob, struct glob_limit *limit, + const char *origpat) { + const Char *qpatnext; + int err; + size_t oldpathc; + Char *bufnext, c, patbuf[MAXPATHLEN]; + + qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); + if (qpatnext == NULL) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + oldpathc = pglob->gl_pathc; + bufnext = patbuf; + + /* We don't need to check for buffer overflow any more. */ + while ((c = *qpatnext++) != EOS) { + switch (c) { + case LBRACKET: + c = *qpatnext; + if (c == NOT) + ++qpatnext; + if (*qpatnext == EOS || + g_strchr(qpatnext+1, RBRACKET) == NULL) { + *bufnext++ = LBRACKET; + if (c == NOT) + --qpatnext; + break; + } + *bufnext++ = M_SET; + if (c == NOT) + *bufnext++ = M_NOT; + c = *qpatnext++; + do { + *bufnext++ = CHAR(c); + if (*qpatnext == RANGE && + (c = qpatnext[1]) != RBRACKET) { + *bufnext++ = M_RNG; + *bufnext++ = CHAR(c); + qpatnext += 2; + } + } while ((c = *qpatnext++) != RBRACKET); + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_END; + break; + case QUESTION: + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_ONE; + break; + case STAR: + pglob->gl_flags |= GLOB_MAGCHAR; + /* collapse adjacent stars to one, + * to avoid exponential behavior + */ + if (bufnext == patbuf || bufnext[-1] != M_ALL) + *bufnext++ = M_ALL; + break; + default: + *bufnext++ = CHAR(c); + break; + } + } + *bufnext = EOS; +#ifdef DEBUG + qprintf("glob0:", patbuf); +#endif + + if ((err = glob1(patbuf, pglob, limit)) != 0) + return(err); + + if (origpat != NULL) + return (globfinal(pglob, limit, oldpathc, origpat)); + + return (0); +} + +static int +globfinal(glob11_t *pglob, struct glob_limit *limit, size_t oldpathc, + const char *origpat) { + if (pglob->gl_pathc == oldpathc) + return (err_nomatch(pglob, limit, origpat)); + + if (!(pglob->gl_flags & GLOB_NOSORT)) + qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, + pglob->gl_pathc - oldpathc, sizeof(char *), compare); + + return (0); +} + +static int +compare(const void *p, const void *q) +{ + return (strcoll(*(char **)p, *(char **)q)); +} + +static int +glob1(Char *pattern, glob11_t *pglob, struct glob_limit *limit) +{ + Char pathbuf[MAXPATHLEN]; + + /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ + if (*pattern == EOS) + return (0); + return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1, + pattern, pglob, limit)); +} + +/* + * The functions glob2 and glob3 are mutually recursive; there is one level + * of recursion for each segment in the pattern that contains one or more + * meta characters. + */ +static int +glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, + glob11_t *pglob, struct glob_limit *limit) +{ + struct freebsd11_stat sb; + Char *p, *q; + int anymeta; + + /* + * Loop over pattern segments until end of pattern or until + * segment with meta character found. + */ + for (anymeta = 0;;) { + if (*pattern == EOS) { /* End of pattern? */ + *pathend = EOS; + if (g_lstat(pathbuf, &sb, pglob)) + return (0); + + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + if ((pglob->gl_flags & GLOB_MARK) && + UNPROT(pathend[-1]) != SEP && + (S_ISDIR(sb.st_mode) || + (S_ISLNK(sb.st_mode) && + g_stat(pathbuf, &sb, pglob) == 0 && + S_ISDIR(sb.st_mode)))) { + if (pathend + 1 > pathend_last) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + *pathend++ = SEP; + *pathend = EOS; + } + ++pglob->gl_matchc; + return (globextend(pathbuf, pglob, limit, NULL)); + } + + /* Find end of next segment, copy tentatively to pathend. */ + q = pathend; + p = pattern; + while (*p != EOS && UNPROT(*p) != SEP) { + if (ismeta(*p)) + anymeta = 1; + if (q + 1 > pathend_last) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + *q++ = *p++; + } + + if (!anymeta) { /* No expansion, do next segment. */ + pathend = q; + pattern = p; + while (UNPROT(*pattern) == SEP) { + if (pathend + 1 > pathend_last) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + *pathend++ = *pattern++; + } + } else /* Need expansion, recurse. */ + return (glob3(pathbuf, pathend, pathend_last, pattern, + p, pglob, limit)); + } + /* NOTREACHED */ +} + +static int +glob3(Char *pathbuf, Char *pathend, Char *pathend_last, + Char *pattern, Char *restpattern, + glob11_t *pglob, struct glob_limit *limit) +{ + struct freebsd11_dirent *dp; + DIR *dirp; + int err, too_long, saverrno, saverrno2; + char buf[MAXPATHLEN + MB_LEN_MAX - 1]; + + struct freebsd11_dirent *(*readdirfunc)(DIR *); + + if (pathend > pathend_last) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + *pathend = EOS; + if (pglob->gl_errfunc != NULL && + g_Ctoc(pathbuf, buf, sizeof(buf))) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + + saverrno = errno; + errno = 0; + if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { + if (errno == ENOENT || errno == ENOTDIR) + return (0); + err = err_aborted(pglob, errno, buf); + if (errno == 0) + errno = saverrno; + return (err); + } + + err = 0; + + /* pglob->gl_readdir takes a void *, fix this manually */ + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + readdirfunc = + (struct freebsd11_dirent *(*)(DIR *))pglob->gl_readdir; + else + readdirfunc = freebsd11_readdir; + + errno = 0; + /* Search directory for matching names. */ + while ((dp = (*readdirfunc)(dirp)) != NULL) { + char *sc; + Char *dc; + wchar_t wc; + size_t clen; + mbstate_t mbs; + + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) { + errno = E2BIG; + err = GLOB_NOSPACE; + break; + } + + /* Initial DOT must be matched literally. */ + if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) { + errno = 0; + continue; + } + memset(&mbs, 0, sizeof(mbs)); + dc = pathend; + sc = dp->d_name; + too_long = 1; + while (dc <= pathend_last) { + clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) { + /* XXX See initial comment #2. */ + wc = (unsigned char)*sc; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if ((*dc++ = wc) == EOS) { + too_long = 0; + break; + } + sc += clen; + } + if (too_long && (err = err_aborted(pglob, ENAMETOOLONG, + buf))) { + errno = ENAMETOOLONG; + break; + } + if (too_long || !match(pathend, pattern, restpattern)) { + *pathend = EOS; + errno = 0; + continue; + } + if (errno == 0) + errno = saverrno; + err = glob2(pathbuf, --dc, pathend_last, restpattern, + pglob, limit); + if (err) + break; + errno = 0; + } + + saverrno2 = errno; + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + (*pglob->gl_closedir)(dirp); + else + closedir(dirp); + errno = saverrno2; + + if (err) + return (err); + + if (dp == NULL && errno != 0 && + (err = err_aborted(pglob, errno, buf))) + return (err); + + if (errno == 0) + errno = saverrno; + return (0); +} + + +/* + * Extend the gl_pathv member of a glob11_t structure to accommodate a new item, + * add the new item, and update gl_pathc. + * + * This assumes the BSD realloc, which only copies the block when its size + * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic + * behavior. + * + * Return 0 if new item added, error code if memory couldn't be allocated. + * + * Invariant of the glob11_t structure: + * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and + * gl_pathv points to (gl_offs + gl_pathc + 1) items. + */ +static int +globextend(const Char *path, glob11_t *pglob, struct glob_limit *limit, + const char *origpat) +{ + char **pathv; + size_t i, newn, len; + char *copy; + const Char *p; + + if ((pglob->gl_flags & GLOB_LIMIT) && + pglob->gl_matchc > limit->l_path_lim) { + errno = E2BIG; + return (GLOB_NOSPACE); + } + + newn = 2 + pglob->gl_pathc + pglob->gl_offs; + /* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */ + pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv)); + if (pathv == NULL) + return (GLOB_NOSPACE); + + if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { + /* first time around -- clear initial gl_offs items */ + pathv += pglob->gl_offs; + for (i = pglob->gl_offs + 1; --i > 0; ) + *--pathv = NULL; + } + pglob->gl_pathv = pathv; + + if (origpat != NULL) + copy = strdup(origpat); + else { + for (p = path; *p++ != EOS;) + continue; + len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */ + if ((copy = malloc(len)) != NULL) { + if (g_Ctoc(path, copy, len)) { + free(copy); + errno = E2BIG; + return (GLOB_NOSPACE); + } + } + } + if (copy != NULL) { + limit->l_string_cnt += strlen(copy) + 1; + if ((pglob->gl_flags & GLOB_LIMIT) && + limit->l_string_cnt >= GLOB_LIMIT_STRING) { + free(copy); + errno = E2BIG; + return (GLOB_NOSPACE); + } + pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; + } + pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; + return (copy == NULL ? GLOB_NOSPACE : 0); +} + +/* + * pattern matching function for filenames. + */ +static int +match(Char *name, Char *pat, Char *patend) +{ + int ok, negate_range; + Char c, k, *nextp, *nextn; + struct xlocale_collate *table = + (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; + + nextn = NULL; + nextp = NULL; + + while (1) { + while (pat < patend) { + c = *pat++; + switch (c & M_MASK) { + case M_ALL: + if (pat == patend) + return (1); + if (*name == EOS) + return (0); + nextn = name + 1; + nextp = pat - 1; + break; + case M_ONE: + if (*name++ == EOS) + goto fail; + break; + case M_SET: + ok = 0; + if ((k = *name++) == EOS) + goto fail; + negate_range = ((*pat & M_MASK) == M_NOT); + if (negate_range != 0) + ++pat; + while (((c = *pat++) & M_MASK) != M_END) + if ((*pat & M_MASK) == M_RNG) { + if (table->__collate_load_error ? + CHAR(c) <= CHAR(k) && + CHAR(k) <= CHAR(pat[1]) : + __wcollate_range_cmp(CHAR(c), + CHAR(k)) <= 0 && + __wcollate_range_cmp(CHAR(k), + CHAR(pat[1])) <= 0) + ok = 1; + pat += 2; + } else if (c == k) + ok = 1; + if (ok == negate_range) + goto fail; + break; + default: + if (*name++ != c) + goto fail; + break; + } + } + if (*name == EOS) + return (1); + + fail: + if (nextn == NULL) + break; + pat = nextp; + name = nextn; + } + return (0); +} + +/* Free allocated data belonging to a glob11_t structure. */ +void +freebsd11_globfree(glob11_t *pglob) +{ + size_t i; + char **pp; + + if (pglob->gl_pathv != NULL) { + pp = pglob->gl_pathv + pglob->gl_offs; + for (i = pglob->gl_pathc; i--; ++pp) + if (*pp) + free(*pp); + free(pglob->gl_pathv); + pglob->gl_pathv = NULL; + } +} + +static DIR * +g_opendir(Char *str, glob11_t *pglob) +{ + char buf[MAXPATHLEN + MB_LEN_MAX - 1]; + + if (*str == EOS) + strcpy(buf, "."); + else { + if (g_Ctoc(str, buf, sizeof(buf))) { + errno = ENAMETOOLONG; + return (NULL); + } + } + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return ((*pglob->gl_opendir)(buf)); + + return (opendir(buf)); +} + +static int +g_lstat(Char *fn, struct freebsd11_stat *sb, glob11_t *pglob) +{ + char buf[MAXPATHLEN + MB_LEN_MAX - 1]; + + if (g_Ctoc(fn, buf, sizeof(buf))) { + errno = ENAMETOOLONG; + return (-1); + } + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_lstat)(buf, sb)); + return (freebsd11_lstat(buf, sb)); +} + +static int +g_stat(Char *fn, struct freebsd11_stat *sb, glob11_t *pglob) +{ + char buf[MAXPATHLEN + MB_LEN_MAX - 1]; + + if (g_Ctoc(fn, buf, sizeof(buf))) { + errno = ENAMETOOLONG; + return (-1); + } + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return ((*pglob->gl_stat)(buf, sb)); + return (freebsd11_stat(buf, sb)); +} + +static const Char * +g_strchr(const Char *str, wchar_t ch) +{ + + do { + if (*str == ch) + return (str); + } while (*str++); + return (NULL); +} + +static int +g_Ctoc(const Char *str, char *buf, size_t len) +{ + mbstate_t mbs; + size_t clen; + + memset(&mbs, 0, sizeof(mbs)); + while (len >= MB_CUR_MAX) { + clen = wcrtomb(buf, CHAR(*str), &mbs); + if (clen == (size_t)-1) { + /* XXX See initial comment #2. */ + *buf = (char)CHAR(*str); + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if (CHAR(*str) == EOS) + return (0); + str++; + buf += clen; + len -= clen; + } + return (1); +} + +static int +err_nomatch(glob11_t *pglob, struct glob_limit *limit, const char *origpat) { + /* + * If there was no match we are going to append the origpat + * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified + * and the origpat did not contain any magic characters + * GLOB_NOMAGIC is there just for compatibility with csh. + */ + if ((pglob->gl_flags & GLOB_NOCHECK) || + ((pglob->gl_flags & GLOB_NOMAGIC) && + !(pglob->gl_flags & GLOB_MAGCHAR))) + return (globextend(NULL, pglob, limit, origpat)); + return (GLOB_NOMATCH); +} + +static int +err_aborted(glob11_t *pglob, int err, char *buf) { + if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) || + (pglob->gl_flags & GLOB_ERR)) + return (GLOB_ABORTED); + return (0); +} + +#ifdef DEBUG +static void +qprintf(const char *str, Char *s) +{ + Char *p; + + (void)printf("%s\n", str); + if (s != NULL) { + for (p = s; *p != EOS; p++) + (void)printf("%c", (char)CHAR(*p)); + (void)printf("\n"); + for (p = s; *p != EOS; p++) + (void)printf("%c", (isprot(*p) ? '\\' : ' ')); + (void)printf("\n"); + for (p = s; *p != EOS; p++) + (void)printf("%c", (ismeta(*p) ? '_' : ' ')); + (void)printf("\n"); + } +} +#endif + +__sym_compat(glob, freebsd11_glob, FBSD_1.0); +__sym_compat(globfree, freebsd11_globfree, FBSD_1.0); Property changes on: head/lib/libc/gen/glob-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/glob-compat11.h =================================================================== --- head/lib/libc/gen/glob-compat11.h (nonexistent) +++ head/lib/libc/gen/glob-compat11.h (revision 318736) @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)glob.h 8.1 (Berkeley) 6/2/93 + * from: $FreeBSD$ + * $FreeBSD$ + */ + +#ifndef _GLOB_COMPAT11_H_ +#define _GLOB_COMPAT11_H_ + +#include +#include +#include + +struct freebsd11_stat; +typedef struct { + size_t gl_pathc; /* Count of total paths so far. */ + size_t gl_matchc; /* Count of paths matching pattern. */ + size_t gl_offs; /* Reserved at beginning of gl_pathv. */ + int gl_flags; /* Copy of flags parameter to glob. */ + char **gl_pathv; /* List of paths matching pattern. */ + /* Copy of errfunc parameter to glob. */ + int (*gl_errfunc)(const char *, int); + + /* + * Alternate filesystem access methods for glob; replacement + * versions of closedir(3), readdir(3), opendir(3), stat(2) + * and lstat(2). + */ + void (*gl_closedir)(void *); + struct freebsd11_dirent *(*gl_readdir)(void *); + void *(*gl_opendir)(const char *); + int (*gl_lstat)(const char *, struct freebsd11_stat *); + int (*gl_stat)(const char *, struct freebsd11_stat *); +} glob11_t; + +__BEGIN_DECLS +int freebsd11_glob(const char * __restrict, int, + int (*)(const char *, int), glob11_t * __restrict); +void freebsd11_globfree(glob11_t *); +__END_DECLS + +#endif /* !_GLOB_COMPAT11_H_ */ Property changes on: head/lib/libc/gen/glob-compat11.h ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/nftw-compat11.c =================================================================== --- head/lib/libc/gen/nftw-compat11.c (nonexistent) +++ head/lib/libc/gen/nftw-compat11.c (revision 318736) @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2003, 2004 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + * + * from: $OpenBSD: nftw.c,v 1.7 2006/03/31 19:41:44 millert Exp $ + * from: $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include "fts-compat11.h" + +int +freebsd11_nftw(const char *path, + int (*fn)(const char *, const struct freebsd11_stat *, int, struct FTW *), + int nfds, int ftwflags) +{ + char * const paths[2] = { (char *)path, NULL }; + struct FTW ftw; + FTSENT11 *cur; + FTS11 *ftsp; + int error = 0, ftsflags, fnflag, postorder, sverrno; + + /* XXX - nfds is currently unused */ + if (nfds < 1) { + errno = EINVAL; + return (-1); + } + + ftsflags = FTS_COMFOLLOW; + if (!(ftwflags & FTW_CHDIR)) + ftsflags |= FTS_NOCHDIR; + if (ftwflags & FTW_MOUNT) + ftsflags |= FTS_XDEV; + if (ftwflags & FTW_PHYS) + ftsflags |= FTS_PHYSICAL; + else + ftsflags |= FTS_LOGICAL; + postorder = (ftwflags & FTW_DEPTH) != 0; + ftsp = freebsd11_fts_open(paths, ftsflags, NULL); + if (ftsp == NULL) + return (-1); + while ((cur = freebsd11_fts_read(ftsp)) != NULL) { + switch (cur->fts_info) { + case FTS_D: + if (postorder) + continue; + fnflag = FTW_D; + break; + case FTS_DC: + continue; + case FTS_DNR: + fnflag = FTW_DNR; + break; + case FTS_DP: + if (!postorder) + continue; + fnflag = FTW_DP; + break; + case FTS_F: + case FTS_DEFAULT: + fnflag = FTW_F; + break; + case FTS_NS: + case FTS_NSOK: + fnflag = FTW_NS; + break; + case FTS_SL: + fnflag = FTW_SL; + break; + case FTS_SLNONE: + fnflag = FTW_SLN; + break; + default: + error = -1; + goto done; + } + ftw.base = cur->fts_pathlen - cur->fts_namelen; + ftw.level = cur->fts_level; + error = fn(cur->fts_path, cur->fts_statp, fnflag, &ftw); + if (error != 0) + break; + } +done: + sverrno = errno; + if (freebsd11_fts_close(ftsp) != 0 && error == 0) + error = -1; + else + errno = sverrno; + return (error); +} + +__sym_compat(nftw, freebsd11_nftw, FBSD_1.0); Property changes on: head/lib/libc/gen/nftw-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/opendir.c =================================================================== --- head/lib/libc/gen/opendir.c (revision 318735) +++ head/lib/libc/gen/opendir.c (revision 318736) @@ -1,360 +1,361 @@ /*- * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)opendir.c 8.8 (Berkeley) 5/1/95"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "gen-private.h" #include "telldir.h" static DIR * __opendir_common(int, int, bool); /* * Open a directory. */ DIR * opendir(const char *name) { return (__opendir2(name, DTF_HIDEW|DTF_NODUP)); } /* * Open a directory with existing file descriptor. */ DIR * fdopendir(int fd) { if (_fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) return (NULL); return (__opendir_common(fd, DTF_HIDEW|DTF_NODUP, true)); } DIR * __opendir2(const char *name, int flags) { int fd; DIR *dir; int saved_errno; if ((flags & (__DTF_READALL | __DTF_SKIPREAD)) != 0) return (NULL); if ((fd = _open(name, O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_CLOEXEC)) == -1) return (NULL); dir = __opendir_common(fd, flags, false); if (dir == NULL) { saved_errno = errno; _close(fd); errno = saved_errno; } return (dir); } static int opendir_compar(const void *p1, const void *p2) { return (strcmp((*(const struct dirent **)p1)->d_name, (*(const struct dirent **)p2)->d_name)); } /* * For a directory at the top of a unionfs stack, the entire directory's * contents are read and cached locally until the next call to rewinddir(). * For the fdopendir() case, the initial seek position must be preserved. * For rewinddir(), the full directory should always be re-read from the * beginning. * * If an error occurs, the existing buffer and state of 'dirp' is left * unchanged. */ bool _filldir(DIR *dirp, bool use_current_pos) { struct dirent **dpv; char *buf, *ddptr, *ddeptr; off_t pos; int fd2, incr, len, n, saved_errno, space; len = 0; space = 0; buf = NULL; ddptr = NULL; /* * Use the system page size if that is a multiple of DIRBLKSIZ. * Hopefully this can be a big win someday by allowing page * trades to user space to be done by _getdirentries(). */ incr = getpagesize(); if ((incr % DIRBLKSIZ) != 0) incr = DIRBLKSIZ; /* * The strategy here is to read all the directory * entries into a buffer, sort the buffer, and * remove duplicate entries by setting the inode * number to zero. * * We reopen the directory because _getdirentries() * on a MNT_UNION mount modifies the open directory, * making it refer to the lower directory after the * upper directory's entries are exhausted. * This would otherwise break software that uses * the directory descriptor for fchdir or *at * functions, such as fts.c. */ if ((fd2 = _openat(dirp->dd_fd, ".", O_RDONLY | O_CLOEXEC)) == -1) return (false); if (use_current_pos) { pos = lseek(dirp->dd_fd, 0, SEEK_CUR); if (pos == -1 || lseek(fd2, pos, SEEK_SET) == -1) { saved_errno = errno; _close(fd2); errno = saved_errno; return (false); } } do { /* * Always make at least DIRBLKSIZ bytes * available to _getdirentries */ if (space < DIRBLKSIZ) { space += incr; len += incr; buf = reallocf(buf, len); if (buf == NULL) { saved_errno = errno; _close(fd2); errno = saved_errno; return (false); } ddptr = buf + (len - space); } n = _getdirentries(fd2, ddptr, space, &dirp->dd_seek); if (n > 0) { ddptr += n; space -= n; } if (n < 0) { saved_errno = errno; _close(fd2); errno = saved_errno; return (false); } } while (n > 0); _close(fd2); ddeptr = ddptr; /* * There is now a buffer full of (possibly) duplicate * names. */ dirp->dd_buf = buf; /* * Go round this loop twice... * * Scan through the buffer, counting entries. * On the second pass, save pointers to each one. * Then sort the pointers and remove duplicate names. */ for (dpv = NULL;;) { n = 0; ddptr = buf; while (ddptr < ddeptr) { struct dirent *dp; dp = (struct dirent *) ddptr; if ((long)dp & 03L) break; if ((dp->d_reclen <= 0) || (dp->d_reclen > (ddeptr + 1 - ddptr))) break; ddptr += dp->d_reclen; if (dp->d_fileno) { if (dpv) dpv[n] = dp; n++; } } if (dpv) { struct dirent *xp; /* * This sort must be stable. */ mergesort(dpv, n, sizeof(*dpv), opendir_compar); dpv[n] = NULL; xp = NULL; /* * Scan through the buffer in sort order, * zapping the inode number of any * duplicate names. */ for (n = 0; dpv[n]; n++) { struct dirent *dp = dpv[n]; if ((xp == NULL) || strcmp(dp->d_name, xp->d_name)) { xp = dp; } else { dp->d_fileno = 0; } if (dp->d_type == DT_WHT && (dirp->dd_flags & DTF_HIDEW)) dp->d_fileno = 0; } free(dpv); break; } else { dpv = malloc((n+1) * sizeof(struct dirent *)); if (dpv == NULL) break; } } dirp->dd_len = len; dirp->dd_size = ddptr - dirp->dd_buf; return (true); } /* * Common routine for opendir(3), __opendir2(3) and fdopendir(3). */ static DIR * __opendir_common(int fd, int flags, bool use_current_pos) { DIR *dirp; int incr; int saved_errno; int unionstack; if ((dirp = malloc(sizeof(DIR) + sizeof(struct _telldir))) == NULL) return (NULL); dirp->dd_buf = NULL; dirp->dd_fd = fd; dirp->dd_flags = flags; dirp->dd_loc = 0; dirp->dd_lock = NULL; dirp->dd_td = (struct _telldir *)((char *)dirp + sizeof(DIR)); LIST_INIT(&dirp->dd_td->td_locq); dirp->dd_td->td_loccnt = 0; + dirp->dd_compat_de = NULL; /* * Use the system page size if that is a multiple of DIRBLKSIZ. * Hopefully this can be a big win someday by allowing page * trades to user space to be done by _getdirentries(). */ incr = getpagesize(); if ((incr % DIRBLKSIZ) != 0) incr = DIRBLKSIZ; /* * Determine whether this directory is the top of a union stack. */ if (flags & DTF_NODUP) { struct statfs sfb; if (_fstatfs(fd, &sfb) < 0) goto fail; unionstack = !strcmp(sfb.f_fstypename, "unionfs") || (sfb.f_flags & MNT_UNION); } else { unionstack = 0; } if (unionstack) { if (!_filldir(dirp, use_current_pos)) goto fail; dirp->dd_flags |= __DTF_READALL; } else { dirp->dd_len = incr; dirp->dd_buf = malloc(dirp->dd_len); if (dirp->dd_buf == NULL) goto fail; if (use_current_pos) { /* * Read the first batch of directory entries * to prime dd_seek. This also checks if the * fd passed to fdopendir() is a directory. */ dirp->dd_size = _getdirentries(dirp->dd_fd, dirp->dd_buf, dirp->dd_len, &dirp->dd_seek); if (dirp->dd_size < 0) { if (errno == EINVAL) errno = ENOTDIR; goto fail; } dirp->dd_flags |= __DTF_SKIPREAD; } else { dirp->dd_size = 0; dirp->dd_seek = 0; } } return (dirp); fail: saved_errno = errno; free(dirp->dd_buf); free(dirp); errno = saved_errno; return (NULL); } Index: head/lib/libc/gen/readdir-compat11.c =================================================================== --- head/lib/libc/gen/readdir-compat11.c (nonexistent) +++ head/lib/libc/gen/readdir-compat11.c (revision 318736) @@ -0,0 +1,120 @@ +/* + * Copyright (c) 1983, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: + * $FreeBSD$ + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)readdir.c 8.3 (Berkeley) 9/29/94"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#define _WANT_FREEBSD11_DIRENT +#include +#include +#include +#include +#include +#include +#include +#include "un-namespace.h" + +#include "libc_private.h" +#include "gen-private.h" +#include "telldir.h" + +#include "gen-compat.h" + +static bool +freebsd11_cvtdirent(struct freebsd11_dirent *dstdp, struct dirent *srcdp) +{ + + if (srcdp->d_namlen >= sizeof(dstdp->d_name)) + return (false); + dstdp->d_type = srcdp->d_type; + dstdp->d_namlen = srcdp->d_namlen; + dstdp->d_fileno = srcdp->d_fileno; /* truncate */ + dstdp->d_reclen = FREEBSD11_DIRSIZ(dstdp); + bcopy(srcdp->d_name, dstdp->d_name, dstdp->d_namlen); + bzero(dstdp->d_name + dstdp->d_namlen, + dstdp->d_reclen - offsetof(struct freebsd11_dirent, d_name) - + dstdp->d_namlen); + return (true); +} + +struct freebsd11_dirent * +freebsd11_readdir(DIR *dirp) +{ + struct freebsd11_dirent *dstdp; + struct dirent *dp; + + if (__isthreaded) + _pthread_mutex_lock(&dirp->dd_lock); + dp = _readdir_unlocked(dirp, RDU_SKIP); + if (dp != NULL) { + if (dirp->dd_compat_de == NULL) + dirp->dd_compat_de = malloc(sizeof(struct + freebsd11_dirent)); + if (freebsd11_cvtdirent(dirp->dd_compat_de, dp)) + dstdp = dirp->dd_compat_de; + else + dstdp = NULL; + } else + dstdp = NULL; + if (__isthreaded) + _pthread_mutex_unlock(&dirp->dd_lock); + + return (dstdp); +} + +int +freebsd11_readdir_r(DIR *dirp, struct freebsd11_dirent *entry, + struct freebsd11_dirent **result) +{ + struct dirent xentry, *xresult; + int error; + + error = __readdir_r(dirp, &xentry, &xresult); + if (error != 0) + return (error); + if (xresult != NULL) { + if (freebsd11_cvtdirent(entry, &xentry)) + *result = entry; + else /* should not happen due to RDU_SHORT */ + *result = NULL; + } else + *result = NULL; + return (0); +} + +__sym_compat(readdir, freebsd11_readdir, FBSD_1.0); +__sym_compat(readdir_r, freebsd11_readdir_r, FBSD_1.0); Property changes on: head/lib/libc/gen/readdir-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/readdir.c =================================================================== --- head/lib/libc/gen/readdir.c (revision 318735) +++ head/lib/libc/gen/readdir.c (revision 318736) @@ -1,135 +1,137 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)readdir.c 8.3 (Berkeley) 9/29/94"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "gen-private.h" #include "telldir.h" /* * get next entry in a directory. */ struct dirent * -_readdir_unlocked(DIR *dirp, int skip) +_readdir_unlocked(DIR *dirp, int flags) { struct dirent *dp; long initial_seek; long initial_loc = 0; for (;;) { if (dirp->dd_loc >= dirp->dd_size) { if (dirp->dd_flags & __DTF_READALL) return (NULL); initial_loc = dirp->dd_loc; dirp->dd_flags &= ~__DTF_SKIPREAD; dirp->dd_loc = 0; } if (dirp->dd_loc == 0 && !(dirp->dd_flags & (__DTF_READALL | __DTF_SKIPREAD))) { initial_seek = dirp->dd_seek; dirp->dd_size = _getdirentries(dirp->dd_fd, dirp->dd_buf, dirp->dd_len, &dirp->dd_seek); if (dirp->dd_size <= 0) return (NULL); _fixtelldir(dirp, initial_seek, initial_loc); } dirp->dd_flags &= ~__DTF_SKIPREAD; dp = (struct dirent *)(dirp->dd_buf + dirp->dd_loc); if ((long)dp & 03L) /* bogus pointer check */ return (NULL); if (dp->d_reclen <= 0 || dp->d_reclen > dirp->dd_len + 1 - dirp->dd_loc) return (NULL); dirp->dd_loc += dp->d_reclen; - if (dp->d_ino == 0 && skip) + if (dp->d_ino == 0 && (flags & RDU_SKIP) != 0) continue; if (dp->d_type == DT_WHT && (dirp->dd_flags & DTF_HIDEW)) continue; + if (dp->d_namlen >= sizeof(dp->d_name) && + (flags & RDU_SHORT) != 0) + continue; return (dp); } } struct dirent * readdir(DIR *dirp) { - struct dirent *dp; + struct dirent *dp; - if (__isthreaded) { + if (__isthreaded) _pthread_mutex_lock(&dirp->dd_lock); - dp = _readdir_unlocked(dirp, 1); + dp = _readdir_unlocked(dirp, RDU_SKIP); + if (__isthreaded) _pthread_mutex_unlock(&dirp->dd_lock); - } - else - dp = _readdir_unlocked(dirp, 1); return (dp); } int -readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) +__readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) { struct dirent *dp; int saved_errno; saved_errno = errno; errno = 0; - if (__isthreaded) { + if (__isthreaded) _pthread_mutex_lock(&dirp->dd_lock); - if ((dp = _readdir_unlocked(dirp, 1)) != NULL) - memcpy(entry, dp, _GENERIC_DIRSIZ(dp)); - _pthread_mutex_unlock(&dirp->dd_lock); - } - else if ((dp = _readdir_unlocked(dirp, 1)) != NULL) + dp = _readdir_unlocked(dirp, RDU_SKIP | RDU_SHORT); + if (dp != NULL) memcpy(entry, dp, _GENERIC_DIRSIZ(dp)); + if (__isthreaded) + _pthread_mutex_unlock(&dirp->dd_lock); if (errno != 0) { if (dp == NULL) return (errno); } else errno = saved_errno; if (dp != NULL) *result = entry; else *result = NULL; return (0); } + +__strong_reference(__readdir_r, readdir_r); Index: head/lib/libc/gen/scandir-compat11.c =================================================================== --- head/lib/libc/gen/scandir-compat11.c (nonexistent) +++ head/lib/libc/gen/scandir-compat11.c (revision 318736) @@ -0,0 +1,174 @@ +/* + * Copyright (c) 1983, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: + * $FreeBSD$ + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)scandir.c 8.3 (Berkeley) 1/2/94"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD$"); + +/* + * Scan the directory dirname calling select to make a list of selected + * directory entries then sort using qsort and compare routine dcomp. + * Returns the number of entries and a pointer to a list of pointers to + * struct dirent (through namelist). Returns -1 if there were any errors. + */ + +#include "namespace.h" +#define _WANT_FREEBSD11_DIRENT +#include +#include +#include +#include "un-namespace.h" + +#include "gen-compat.h" + +#ifdef I_AM_SCANDIR_B +#include "block_abi.h" +#define SELECT(x) CALL_BLOCK(select, x) +#ifndef __BLOCKS__ +void +qsort_b(void *, size_t, size_t, void*); +#endif +#else +#define SELECT(x) select(x) +#endif + +static int freebsd11_alphasort_thunk(void *thunk, const void *p1, + const void *p2); + +int +#ifdef I_AM_SCANDIR_B +freebsd11_scandir_b(const char *dirname, struct freebsd11_dirent ***namelist, + DECLARE_BLOCK(int, select, const struct freebsd11_dirent *), + DECLARE_BLOCK(int, dcomp, const struct freebsd11_dirent **, + const struct freebsd11_dirent **)) +#else +freebsd11_scandir(const char *dirname, struct freebsd11_dirent ***namelist, + int (*select)(const struct freebsd11_dirent *), + int (*dcomp)(const struct freebsd11_dirent **, + const struct freebsd11_dirent **)) +#endif +{ + struct freebsd11_dirent *d, *p, **names = NULL; + size_t arraysz, numitems; + DIR *dirp; + + if ((dirp = opendir(dirname)) == NULL) + return(-1); + + numitems = 0; + arraysz = 32; /* initial estimate of the array size */ + names = (struct freebsd11_dirent **)malloc( + arraysz * sizeof(struct freebsd11_dirent *)); + if (names == NULL) + goto fail; + + while ((d = freebsd11_readdir(dirp)) != NULL) { + if (select != NULL && !SELECT(d)) + continue; /* just selected names */ + /* + * Make a minimum size copy of the data + */ + p = (struct freebsd11_dirent *)malloc(FREEBSD11_DIRSIZ(d)); + if (p == NULL) + goto fail; + p->d_fileno = d->d_fileno; + p->d_type = d->d_type; + p->d_reclen = d->d_reclen; + p->d_namlen = d->d_namlen; + bcopy(d->d_name, p->d_name, p->d_namlen + 1); + /* + * Check to make sure the array has space left and + * realloc the maximum size. + */ + if (numitems >= arraysz) { + struct freebsd11_dirent **names2; + + names2 = reallocarray(names, arraysz, + 2 * sizeof(struct freebsd11_dirent *)); + if (names2 == NULL) { + free(p); + goto fail; + } + names = names2; + arraysz *= 2; + } + names[numitems++] = p; + } + closedir(dirp); + if (numitems && dcomp != NULL) +#ifdef I_AM_SCANDIR_B + qsort_b(names, numitems, sizeof(struct freebsd11_dirent *), + (void*)dcomp); +#else + qsort_r(names, numitems, sizeof(struct freebsd11_dirent *), + &dcomp, freebsd11_alphasort_thunk); +#endif + *namelist = names; + return (numitems); + +fail: + while (numitems > 0) + free(names[--numitems]); + free(names); + closedir(dirp); + return (-1); +} + +/* + * Alphabetic order comparison routine for those who want it. + * POSIX 2008 requires that alphasort() uses strcoll(). + */ +int +freebsd11_alphasort(const struct freebsd11_dirent **d1, + const struct freebsd11_dirent **d2) +{ + + return (strcoll((*d1)->d_name, (*d2)->d_name)); +} + +static int +freebsd11_alphasort_thunk(void *thunk, const void *p1, const void *p2) +{ + int (*dc)(const struct freebsd11_dirent **, const struct + freebsd11_dirent **); + + dc = *(int (**)(const struct freebsd11_dirent **, + const struct freebsd11_dirent **))thunk; + return (dc((const struct freebsd11_dirent **)p1, + (const struct freebsd11_dirent **)p2)); +} + +__sym_compat(alphasort, freebsd11_alphasort, FBSD_1.0); +__sym_compat(scandir, freebsd11_scandir, FBSD_1.0); +__sym_compat(scandir_b, freebsd11_scandir_b, FBSD_1.4); Property changes on: head/lib/libc/gen/scandir-compat11.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/lib/libc/gen/scandir.c =================================================================== --- head/lib/libc/gen/scandir.c (revision 318735) +++ head/lib/libc/gen/scandir.c (revision 318736) @@ -1,166 +1,155 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)scandir.c 8.3 (Berkeley) 1/2/94"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); /* * Scan the directory dirname calling select to make a list of selected * directory entries then sort using qsort and compare routine dcomp. * Returns the number of entries and a pointer to a list of pointers to * struct dirent (through namelist). Returns -1 if there were any errors. */ #include "namespace.h" #include #include #include #include "un-namespace.h" #ifdef I_AM_SCANDIR_B #include "block_abi.h" #define SELECT(x) CALL_BLOCK(select, x) #ifndef __BLOCKS__ void qsort_b(void *, size_t, size_t, void*); #endif #else #define SELECT(x) select(x) #endif static int alphasort_thunk(void *thunk, const void *p1, const void *p2); -/* - * The DIRSIZ macro is the minimum record length which will hold the directory - * entry. This requires the amount of space in struct dirent without the - * d_name field, plus enough space for the name and a terminating nul byte - * (dp->d_namlen + 1), rounded up to a 4 byte boundary. - */ -#undef DIRSIZ -#define DIRSIZ(dp) \ - ((sizeof(struct dirent) - sizeof(dp)->d_name) + \ - (((dp)->d_namlen + 1 + 3) &~ 3)) - int #ifdef I_AM_SCANDIR_B scandir_b(const char *dirname, struct dirent ***namelist, DECLARE_BLOCK(int, select, const struct dirent *), DECLARE_BLOCK(int, dcomp, const struct dirent **, const struct dirent **)) #else scandir(const char *dirname, struct dirent ***namelist, int (*select)(const struct dirent *), int (*dcomp)(const struct dirent **, const struct dirent **)) #endif { struct dirent *d, *p, **names = NULL; size_t arraysz, numitems; DIR *dirp; if ((dirp = opendir(dirname)) == NULL) return(-1); numitems = 0; arraysz = 32; /* initial estimate of the array size */ names = (struct dirent **)malloc(arraysz * sizeof(struct dirent *)); if (names == NULL) goto fail; while ((d = readdir(dirp)) != NULL) { if (select != NULL && !SELECT(d)) continue; /* just selected names */ /* * Make a minimum size copy of the data */ - p = (struct dirent *)malloc(DIRSIZ(d)); + p = (struct dirent *)malloc(_GENERIC_DIRSIZ(d)); if (p == NULL) goto fail; p->d_fileno = d->d_fileno; p->d_type = d->d_type; p->d_reclen = d->d_reclen; p->d_namlen = d->d_namlen; bcopy(d->d_name, p->d_name, p->d_namlen + 1); /* * Check to make sure the array has space left and * realloc the maximum size. */ if (numitems >= arraysz) { struct dirent **names2; names2 = reallocarray(names, arraysz, 2 * sizeof(struct dirent *)); if (names2 == NULL) { free(p); goto fail; } names = names2; arraysz *= 2; } names[numitems++] = p; } closedir(dirp); if (numitems && dcomp != NULL) #ifdef I_AM_SCANDIR_B qsort_b(names, numitems, sizeof(struct dirent *), (void*)dcomp); #else qsort_r(names, numitems, sizeof(struct dirent *), &dcomp, alphasort_thunk); #endif *namelist = names; return (numitems); fail: while (numitems > 0) free(names[--numitems]); free(names); closedir(dirp); return (-1); } /* * Alphabetic order comparison routine for those who want it. * POSIX 2008 requires that alphasort() uses strcoll(). */ int alphasort(const struct dirent **d1, const struct dirent **d2) { return (strcoll((*d1)->d_name, (*d2)->d_name)); } static int alphasort_thunk(void *thunk, const void *p1, const void *p2) { int (*dc)(const struct dirent **, const struct dirent **); dc = *(int (**)(const struct dirent **, const struct dirent **))thunk; return (dc((const struct dirent **)p1, (const struct dirent **)p2)); } Index: head/lib/libc/gen/telldir.h =================================================================== --- head/lib/libc/gen/telldir.h (revision 318735) +++ head/lib/libc/gen/telldir.h (revision 318736) @@ -1,69 +1,72 @@ /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Copyright (c) 2000 * Daniel Eischen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _TELLDIR_H_ #define _TELLDIR_H_ #include #include /* * One of these structures is malloced to describe the current directory * position each time telldir is called. It records the current magic * cookie returned by getdirentries and the offset within the buffer * associated with that return value. */ struct ddloc { LIST_ENTRY(ddloc) loc_lqe; /* entry in list */ long loc_index; /* key associated with structure */ - long loc_seek; /* magic cookie returned by getdirentries */ + off_t loc_seek; /* magic cookie returned by getdirentries */ long loc_loc; /* offset of entry in buffer */ }; /* * One of these structures is malloced for each DIR to record telldir * positions. */ struct _telldir { LIST_HEAD(, ddloc) td_locq; /* list of locations */ long td_loccnt; /* index of entry for sequential readdir's */ }; bool _filldir(DIR *, bool); struct dirent *_readdir_unlocked(DIR *, int); void _reclaim_telldir(DIR *); void _seekdir(DIR *, long); void _fixtelldir(DIR *dirp, long oldseek, long oldloc); + +#define RDU_SKIP 0x0001 +#define RDU_SHORT 0x0002 #endif Index: head/lib/libc/include/compat.h =================================================================== --- head/lib/libc/include/compat.h (revision 318735) +++ head/lib/libc/include/compat.h (revision 318736) @@ -1,57 +1,78 @@ /*- * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file defines compatibility symbol versions for old system calls. It * is included in all generated system call files. */ #ifndef __LIBC_COMPAT_H__ #define __LIBC_COMPAT_H__ #define __sym_compat(sym,impl,verid) \ .symver impl, sym@verid #ifndef NO_COMPAT7 __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0); __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0); __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0); #endif +__sym_compat(nfstat, freebsd11_nfstat, FBSD_1.0); +__sym_compat(nlstat, freebsd11_nlstat, FBSD_1.0); +__sym_compat(nstat, freebsd11_nstat, FBSD_1.0); + +__sym_compat(fhstat, freebsd11_fhstat, FBSD_1.0); +__sym_compat(fstat, freebsd11_fstat, FBSD_1.0); +__sym_compat(fstatat, freebsd11_fstatat, FBSD_1.1); +__sym_compat(lstat, freebsd11_lstat, FBSD_1.0); +__sym_compat(stat, freebsd11_stat, FBSD_1.0); + +__sym_compat(getdents, freebsd11_getdents, FBSD_1.0); +__sym_compat(getdirentries, freebsd11_getdirentries, FBSD_1.0); + +__sym_compat(getfsstat, freebsd11_getfsstat, FBSD_1.0); +__sym_compat(fhstatfs, freebsd11_fhstatfs, FBSD_1.0); +__sym_compat(fstatfs, freebsd11_fstatfs, FBSD_1.0); +__sym_compat(statfs, freebsd11_statfs, FBSD_1.0); + +__sym_compat(mknod, freebsd11_mknod, FBSD_1.0); +__sym_compat(mknodat, freebsd11_mknodat, FBSD_1.1); + #undef __sym_compat #define __weak_reference(sym,alias) \ .weak alias;.equ alias,sym __weak_reference(__sys_fcntl,__fcntl_compat) #undef __weak_reference #endif /* __LIBC_COMPAT_H__ */ Index: head/lib/libc/include/libc_private.h =================================================================== --- head/lib/libc/include/libc_private.h (revision 318735) +++ head/lib/libc/include/libc_private.h (revision 318736) @@ -1,409 +1,412 @@ /* * Copyright (c) 1998 John Birrell . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * * Private definitions for libc, libc_r and libpthread. * */ #ifndef _LIBC_PRIVATE_H_ #define _LIBC_PRIVATE_H_ #include #include /* * This global flag is non-zero when a process has created one * or more threads. It is used to avoid calling locking functions * when they are not required. */ extern int __isthreaded; /* * Elf_Auxinfo *__elf_aux_vector, the pointer to the ELF aux vector * provided by kernel. Either set for us by rtld, or found at runtime * on stack for static binaries. * * Type is void to avoid polluting whole libc with ELF types. */ extern void *__elf_aux_vector; /* * libc should use libc_dlopen internally, which respects a global * flag where loading of new shared objects can be restricted. */ void *libc_dlopen(const char *, int); /* * For dynamic linker. */ void _rtld_error(const char *fmt, ...); /* * File lock contention is difficult to diagnose without knowing * where locks were set. Allow a debug library to be built which * records the source file and line number of each lock call. */ #ifdef _FLOCK_DEBUG #define _FLOCKFILE(x) _flockfile_debug(x, __FILE__, __LINE__) #else #define _FLOCKFILE(x) _flockfile(x) #endif /* * Macros for locking and unlocking FILEs. These test if the * process is threaded to avoid locking when not required. */ #define FLOCKFILE(fp) if (__isthreaded) _FLOCKFILE(fp) #define FUNLOCKFILE(fp) if (__isthreaded) _funlockfile(fp) struct _spinlock; extern struct _spinlock __stdio_thread_lock __hidden; #define STDIO_THREAD_LOCK() \ do { \ if (__isthreaded) \ _SPINLOCK(&__stdio_thread_lock); \ } while (0) #define STDIO_THREAD_UNLOCK() \ do { \ if (__isthreaded) \ _SPINUNLOCK(&__stdio_thread_lock); \ } while (0) void __libc_spinlock_stub(struct _spinlock *); void __libc_spinunlock_stub(struct _spinlock *); /* * Indexes into the pthread jump table. * * Warning! If you change this type, you must also change the threads * libraries that reference it (libc_r, libpthread). */ typedef enum { PJT_ATFORK, PJT_ATTR_DESTROY, PJT_ATTR_GETDETACHSTATE, PJT_ATTR_GETGUARDSIZE, PJT_ATTR_GETINHERITSCHED, PJT_ATTR_GETSCHEDPARAM, PJT_ATTR_GETSCHEDPOLICY, PJT_ATTR_GETSCOPE, PJT_ATTR_GETSTACKADDR, PJT_ATTR_GETSTACKSIZE, PJT_ATTR_INIT, PJT_ATTR_SETDETACHSTATE, PJT_ATTR_SETGUARDSIZE, PJT_ATTR_SETINHERITSCHED, PJT_ATTR_SETSCHEDPARAM, PJT_ATTR_SETSCHEDPOLICY, PJT_ATTR_SETSCOPE, PJT_ATTR_SETSTACKADDR, PJT_ATTR_SETSTACKSIZE, PJT_CANCEL, PJT_CLEANUP_POP, PJT_CLEANUP_PUSH, PJT_COND_BROADCAST, PJT_COND_DESTROY, PJT_COND_INIT, PJT_COND_SIGNAL, PJT_COND_TIMEDWAIT, PJT_COND_WAIT, PJT_DETACH, PJT_EQUAL, PJT_EXIT, PJT_GETSPECIFIC, PJT_JOIN, PJT_KEY_CREATE, PJT_KEY_DELETE, PJT_KILL, PJT_MAIN_NP, PJT_MUTEXATTR_DESTROY, PJT_MUTEXATTR_INIT, PJT_MUTEXATTR_SETTYPE, PJT_MUTEX_DESTROY, PJT_MUTEX_INIT, PJT_MUTEX_LOCK, PJT_MUTEX_TRYLOCK, PJT_MUTEX_UNLOCK, PJT_ONCE, PJT_RWLOCK_DESTROY, PJT_RWLOCK_INIT, PJT_RWLOCK_RDLOCK, PJT_RWLOCK_TRYRDLOCK, PJT_RWLOCK_TRYWRLOCK, PJT_RWLOCK_UNLOCK, PJT_RWLOCK_WRLOCK, PJT_SELF, PJT_SETCANCELSTATE, PJT_SETCANCELTYPE, PJT_SETSPECIFIC, PJT_SIGMASK, PJT_TESTCANCEL, PJT_CLEANUP_POP_IMP, PJT_CLEANUP_PUSH_IMP, PJT_CANCEL_ENTER, PJT_CANCEL_LEAVE, PJT_MUTEX_CONSISTENT, PJT_MUTEXATTR_GETROBUST, PJT_MUTEXATTR_SETROBUST, PJT_MAX } pjt_index_t; typedef int (*pthread_func_t)(void); typedef pthread_func_t pthread_func_entry_t[2]; extern pthread_func_entry_t __thr_jtable[]; void __set_error_selector(int *(*arg)(void)); int _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, void *(calloc_cb)(__size_t, __size_t)); typedef int (*interpos_func_t)(void); interpos_func_t *__libc_interposing_slot(int interposno); extern interpos_func_t __libc_interposing[] __hidden; enum { INTERPOS_accept, INTERPOS_accept4, INTERPOS_aio_suspend, INTERPOS_close, INTERPOS_connect, INTERPOS_fcntl, INTERPOS_fsync, INTERPOS_fork, INTERPOS_msync, INTERPOS_nanosleep, INTERPOS_openat, INTERPOS_poll, INTERPOS_pselect, INTERPOS_recvfrom, INTERPOS_recvmsg, INTERPOS_select, INTERPOS_sendmsg, INTERPOS_sendto, INTERPOS_setcontext, INTERPOS_sigaction, INTERPOS_sigprocmask, INTERPOS_sigsuspend, INTERPOS_sigwait, INTERPOS_sigtimedwait, INTERPOS_sigwaitinfo, INTERPOS_swapcontext, INTERPOS_system, INTERPOS_tcdrain, INTERPOS_read, INTERPOS_readv, INTERPOS_wait4, INTERPOS_write, INTERPOS_writev, INTERPOS__pthread_mutex_init_calloc_cb, INTERPOS_spinlock, INTERPOS_spinunlock, INTERPOS_kevent, INTERPOS_wait6, INTERPOS_ppoll, INTERPOS_map_stacks_exec, INTERPOS_fdatasync, INTERPOS_clock_nanosleep, INTERPOS_MAX }; /* * yplib internal interfaces */ #ifdef YP int _yp_check(char **); #endif /* * Initialise TLS for static programs */ void _init_tls(void); /* * Provides pthread_once()-like functionality for both single-threaded * and multi-threaded applications. */ int _once(pthread_once_t *, void (*)(void)); /* * Set the TLS thread pointer */ void _set_tp(void *tp); /* * This is a pointer in the C run-time startup code. It is used * by getprogname() and setprogname(). */ extern const char *__progname; /* * This function is used by the threading libraries to notify malloc that a * thread is exiting. */ void _malloc_thread_cleanup(void); /* * This function is used by the threading libraries to notify libc that a * thread is exiting, so its thread-local dtors should be called. */ void __cxa_thread_call_dtors(void); int __cxa_thread_atexit_hidden(void (*dtor_func)(void *), void *obj, void *dso_symbol) __hidden; /* * These functions are used by the threading libraries in order to protect * malloc across fork(). */ void _malloc_prefork(void); void _malloc_postfork(void); void _malloc_first_thread(void); /* * Function to clean up streams, called from abort() and exit(). */ extern void (*__cleanup)(void) __hidden; /* * Get kern.osreldate to detect ABI revisions. Explicitly * ignores value of $OSVERSION and caches result. */ int __getosreldate(void); #include #include struct aiocb; struct fd_set; struct iovec; struct kevent; struct msghdr; struct pollfd; struct rusage; struct sigaction; struct sockaddr; +struct stat; struct timespec; struct timeval; struct timezone; struct __siginfo; struct __ucontext; struct __wrusage; enum idtype; int __sys_aio_suspend(const struct aiocb * const[], int, const struct timespec *); int __sys_accept(int, struct sockaddr *, __socklen_t *); int __sys_accept4(int, struct sockaddr *, __socklen_t *, int); int __sys_clock_gettime(__clockid_t, struct timespec *ts); int __sys_clock_nanosleep(__clockid_t, int, const struct timespec *, struct timespec *); int __sys_close(int); int __sys_connect(int, const struct sockaddr *, __socklen_t); +__ssize_t __sys_getdirentries(int, char *, __size_t, __off_t *); int __sys_fcntl(int, int, ...); int __sys_fdatasync(int); +int __sys_fstatat(int, const char *, struct stat *, int); int __sys_fsync(int); __pid_t __sys_fork(void); int __sys_ftruncate(int, __off_t); int __sys_gettimeofday(struct timeval *, struct timezone *); int __sys_kevent(int, const struct kevent *, int, struct kevent *, int, const struct timespec *); __off_t __sys_lseek(int, __off_t, int); void *__sys_mmap(void *, __size_t, int, int, int, __off_t); int __sys_msync(void *, __size_t, int); int __sys_nanosleep(const struct timespec *, struct timespec *); int __sys_open(const char *, int, ...); int __sys_openat(int, const char *, int, ...); int __sys_pselect(int, struct fd_set *, struct fd_set *, struct fd_set *, const struct timespec *, const __sigset_t *); int __sys_ptrace(int, __pid_t, char *, int); int __sys_poll(struct pollfd *, unsigned, int); int __sys_ppoll(struct pollfd *, unsigned, const struct timespec *, const __sigset_t *); __ssize_t __sys_pread(int, void *, __size_t, __off_t); __ssize_t __sys_pwrite(int, const void *, __size_t, __off_t); __ssize_t __sys_read(int, void *, __size_t); __ssize_t __sys_readv(int, const struct iovec *, int); __ssize_t __sys_recv(int, void *, __size_t, int); __ssize_t __sys_recvfrom(int, void *, __size_t, int, struct sockaddr *, __socklen_t *); __ssize_t __sys_recvmsg(int, struct msghdr *, int); int __sys_select(int, struct fd_set *, struct fd_set *, struct fd_set *, struct timeval *); __ssize_t __sys_sendmsg(int, const struct msghdr *, int); __ssize_t __sys_sendto(int, const void *, __size_t, int, const struct sockaddr *, __socklen_t); int __sys_setcontext(const struct __ucontext *); int __sys_sigaction(int, const struct sigaction *, struct sigaction *); int __sys_sigprocmask(int, const __sigset_t *, __sigset_t *); int __sys_sigsuspend(const __sigset_t *); int __sys_sigtimedwait(const __sigset_t *, struct __siginfo *, const struct timespec *); int __sys_sigwait(const __sigset_t *, int *); int __sys_sigwaitinfo(const __sigset_t *, struct __siginfo *); int __sys_swapcontext(struct __ucontext *, const struct __ucontext *); int __sys_thr_kill(long, int); int __sys_thr_self(long *); int __sys_truncate(const char *, __off_t); __pid_t __sys_wait4(__pid_t, int *, int, struct rusage *); __pid_t __sys_wait6(enum idtype, __id_t, int *, int, struct __wrusage *, struct __siginfo *); __ssize_t __sys_write(int, const void *, __size_t); __ssize_t __sys_writev(int, const struct iovec *, int); int __libc_sigaction(int, const struct sigaction *, struct sigaction *) __hidden; int __libc_sigprocmask(int, const __sigset_t *, __sigset_t *) __hidden; int __libc_sigsuspend(const __sigset_t *) __hidden; int __libc_sigwait(const __sigset_t * __restrict, int * restrict sig); int __libc_system(const char *); int __libc_tcdrain(int); int __fcntl_compat(int fd, int cmd, ...); int __sys_futimens(int fd, const struct timespec *times) __hidden; int __sys_utimensat(int fd, const char *path, const struct timespec *times, int flag) __hidden; /* execve() with PATH processing to implement posix_spawnp() */ int _execvpe(const char *, char * const *, char * const *); int _elf_aux_info(int aux, void *buf, int buflen); struct dl_phdr_info; int __elf_phdr_match_addr(struct dl_phdr_info *, void *); void __init_elf_aux_vector(void); void __libc_map_stacks_exec(void); void _pthread_cancel_enter(int); void _pthread_cancel_leave(int); void __throw_constraint_handler_s(const char * restrict msg, int error); #endif /* _LIBC_PRIVATE_H_ */ Index: head/lib/libc/sys/Makefile.inc =================================================================== --- head/lib/libc/sys/Makefile.inc (revision 318735) +++ head/lib/libc/sys/Makefile.inc (revision 318736) @@ -1,488 +1,490 @@ # @(#)Makefile.inc 8.3 (Berkeley) 10/24/94 # $FreeBSD$ # sys sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/sys ${LIBC_SRCTOP}/sys # Include the generated makefile containing the *complete* list # of syscall names in MIASM. .include "${SRCTOP}/sys/sys/syscall.mk" # Include machine dependent definitions. # # MDASM names override the default syscall names in MIASM. # NOASM will prevent the default syscall code from being generated. # PSEUDO generates _() and __sys_() symbols, but not (). # # While historically machine dependent, all architectures have the following # declarations in common: # NOASM= break.o \ exit.o \ getlogin.o \ sstk.o \ yield.o PSEUDO= _exit.o \ _getlogin.o .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/sys/Makefile.inc" SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c NOASM+= clock_gettime.o gettimeofday.o PSEUDO+= _clock_gettime.o _gettimeofday.o # Sources common to both syscall interfaces: SRCS+= \ __error.c \ interposing_table.c +SRCS+= getdents.c lstat.c mknod.c stat.c + SRCS+= futimens.c utimensat.c NOASM+= futimens.o utimensat.o PSEUDO+= _futimens.o _utimensat.o SRCS+= pipe.c INTERPOSED = \ accept \ accept4 \ aio_suspend \ clock_nanosleep \ close \ connect \ fcntl \ fdatasync \ fsync \ fork \ kevent \ msync \ nanosleep \ open \ openat \ poll \ ppoll \ pselect \ ptrace \ read \ readv \ recvfrom \ recvmsg \ select \ sendmsg \ sendto \ setcontext \ sigprocmask \ sigsuspend \ sigtimedwait \ sigwait \ sigwaitinfo \ swapcontext \ wait4 \ wait6 \ write \ writev .if ${MACHINE_CPUARCH} == "sparc64" SRCS+= sigaction.c NOASM+= sigaction.o .else INTERPOSED+= sigaction .endif SRCS+= ${INTERPOSED:S/$/.c/} NOASM+= ${INTERPOSED:S/$/.o/} PSEUDO+= ${INTERPOSED:C/^.*$/_&.o/} # Add machine dependent asm sources: SRCS+=${MDASM} # Look though the complete list of syscalls (MIASM) for names that are # not defined with machine dependent implementations (MDASM) and are # not declared for no generation of default code (NOASM). Add each # syscall that satisfies these conditions to the ASM list. .for _asm in ${MIASM} .if (${MDASM:R:M${_asm:R}} == "") .if (${NOASM:R:M${_asm:R}} == "") ASM+=$(_asm) .endif .endif .endfor SASM= ${ASM:S/.o/.S/} SPSEUDO= ${PSEUDO:S/.o/.S/} SRCS+= ${SASM} ${SPSEUDO} SYM_MAPS+= ${LIBC_SRCTOP}/sys/Symbol.map # Generated files CLEANFILES+= ${SASM} ${SPSEUDO} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \ ${MACHINE_CPUARCH} == "powerpc" || ${MACHINE_ARCH:Marmv6*} NOTE_GNU_STACK='\t.section .note.GNU-stack,"",%%progbits\n' .else NOTE_GNU_STACK='' .endif ${SASM}: printf '#include "compat.h"\n' > ${.TARGET} printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET} printf ${NOTE_GNU_STACK} >>${.TARGET} ${SPSEUDO}: printf '#include "compat.h"\n' > ${.TARGET} printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \ >> ${.TARGET} printf ${NOTE_GNU_STACK} >>${.TARGET} MAN+= abort2.2 \ accept.2 \ access.2 \ acct.2 \ adjtime.2 \ aio_cancel.2 \ aio_error.2 \ aio_fsync.2 \ aio_mlock.2 \ aio_read.2 \ aio_return.2 \ aio_suspend.2 \ aio_waitcomplete.2 \ aio_write.2 \ bind.2 \ bindat.2 \ brk.2 \ cap_enter.2 \ cap_fcntls_limit.2 \ cap_ioctls_limit.2 \ cap_rights_limit.2 \ chdir.2 \ chflags.2 \ chmod.2 \ chown.2 \ chroot.2 \ clock_gettime.2 \ close.2 \ closefrom.2 \ connect.2 \ connectat.2 \ cpuset.2 \ cpuset_getaffinity.2 \ dup.2 \ execve.2 \ _exit.2 \ extattr_get_file.2 \ fcntl.2 \ ffclock.2 \ fhopen.2 \ flock.2 \ fork.2 \ fsync.2 \ getdirentries.2 \ getdtablesize.2 \ getfh.2 \ getfsstat.2 \ getgid.2 \ getgroups.2 \ getitimer.2 \ getlogin.2 \ getloginclass.2 \ getpeername.2 \ getpgrp.2 \ getpid.2 \ getpriority.2 \ getrlimit.2 \ getrusage.2 \ getsid.2 \ getsockname.2 \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ jail.2 \ kenv.2 \ kill.2 \ kldfind.2 \ kldfirstmod.2 \ kldload.2 \ kldnext.2 \ kldstat.2 \ kldsym.2 \ kldunload.2 \ kqueue.2 \ ktrace.2 \ link.2 \ lio_listio.2 \ listen.2 \ lseek.2 \ madvise.2 \ mincore.2 \ minherit.2 \ mkdir.2 \ mkfifo.2 \ mknod.2 \ mlock.2 \ mlockall.2 \ mmap.2 \ modfind.2 \ modnext.2 \ modstat.2 \ mount.2 \ mprotect.2 \ mq_close.2 \ mq_getattr.2 \ mq_notify.2 \ mq_open.2 \ mq_receive.2 \ mq_send.2 \ mq_setattr.2 \ msgctl.2 \ msgget.2 \ msgrcv.2 \ msgsnd.2 \ msync.2 \ munmap.2 \ nanosleep.2 \ nfssvc.2 \ ntp_adjtime.2 \ numa_getaffinity.2 \ open.2 \ pathconf.2 \ pdfork.2 \ pipe.2 \ poll.2 \ posix_fadvise.2 \ posix_fallocate.2 \ posix_openpt.2 \ procctl.2 \ profil.2 \ pselect.2 \ ptrace.2 \ quotactl.2 \ rctl_add_rule.2 \ read.2 \ readlink.2 \ reboot.2 \ recv.2 \ rename.2 \ revoke.2 \ rfork.2 \ rmdir.2 \ rtprio.2 .if !defined(NO_P1003_1B) MAN+= sched_get_priority_max.2 \ sched_setparam.2 \ sched_setscheduler.2 \ sched_yield.2 .endif MAN+= sctp_generic_recvmsg.2 \ sctp_generic_sendmsg.2 \ sctp_peeloff.2 \ select.2 \ semctl.2 \ semget.2 \ semop.2 \ send.2 \ setfib.2 \ sendfile.2 \ setgroups.2 \ setpgid.2 \ setregid.2 \ setresuid.2 \ setreuid.2 \ setsid.2 \ setuid.2 \ shmat.2 \ shmctl.2 \ shmget.2 \ shm_open.2 \ shutdown.2 \ sigaction.2 \ sigaltstack.2 \ sigpending.2 \ sigprocmask.2 \ sigqueue.2 \ sigreturn.2 \ sigstack.2 \ sigsuspend.2 \ sigwait.2 \ sigwaitinfo.2 \ socket.2 \ socketpair.2 \ stat.2 \ statfs.2 \ swapon.2 \ symlink.2 \ sync.2 \ sysarch.2 \ syscall.2 \ thr_exit.2 \ thr_kill.2 \ thr_new.2 \ thr_self.2 \ thr_set_name.2 \ thr_suspend.2 \ thr_wake.2 \ timer_create.2 \ timer_delete.2 \ timer_settime.2 \ truncate.2 \ umask.2 \ undelete.2 \ unlink.2 \ utimensat.2 \ utimes.2 \ utrace.2 \ uuidgen.2 \ vfork.2 \ wait.2 \ write.2 \ _umtx_op.2 MLINKS+=accept.2 accept4.2 MLINKS+=access.2 eaccess.2 \ access.2 faccessat.2 MLINKS+=brk.2 sbrk.2 MLINKS+=cap_enter.2 cap_getmode.2 MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2 MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2 MLINKS+=cap_rights_limit.2 cap_rights_get.2 MLINKS+=chdir.2 fchdir.2 MLINKS+=chflags.2 chflagsat.2 \ chflags.2 fchflags.2 \ chflags.2 lchflags.2 MLINKS+=chmod.2 fchmod.2 \ chmod.2 fchmodat.2 \ chmod.2 lchmod.2 MLINKS+=chown.2 fchown.2 \ chown.2 fchownat.2 \ chown.2 lchown.2 MLINKS+=clock_gettime.2 clock_getres.2 \ clock_gettime.2 clock_settime.2 MLINKS+=nanosleep.2 clock_nanosleep.2 MLINKS+=cpuset.2 cpuset_getid.2 \ cpuset.2 cpuset_setid.2 MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2 MLINKS+=dup.2 dup2.2 MLINKS+=execve.2 fexecve.2 MLINKS+=extattr_get_file.2 extattr.2 \ extattr_get_file.2 extattr_delete_fd.2 \ extattr_get_file.2 extattr_delete_file.2 \ extattr_get_file.2 extattr_delete_link.2 \ extattr_get_file.2 extattr_get_fd.2 \ extattr_get_file.2 extattr_get_link.2 \ extattr_get_file.2 extattr_list_fd.2 \ extattr_get_file.2 extattr_list_file.2 \ extattr_get_file.2 extattr_list_link.2 \ extattr_get_file.2 extattr_set_fd.2 \ extattr_get_file.2 extattr_set_file.2 \ extattr_get_file.2 extattr_set_link.2 MLINKS+=ffclock.2 ffclock_getcounter.2 \ ffclock.2 ffclock_getestimate.2 \ ffclock.2 ffclock_setestimate.2 MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatfs.2 MLINKS+=fsync.2 fdatasync.2 MLINKS+=getdirentries.2 getdents.2 MLINKS+=getfh.2 lgetfh.2 MLINKS+=getgid.2 getegid.2 MLINKS+=getitimer.2 setitimer.2 MLINKS+=getlogin.2 getlogin_r.3 MLINKS+=getlogin.2 setlogin.2 MLINKS+=getloginclass.2 setloginclass.2 MLINKS+=getpgrp.2 getpgid.2 MLINKS+=getpid.2 getppid.2 MLINKS+=getpriority.2 setpriority.2 MLINKS+=getrlimit.2 setrlimit.2 MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ jail.2 jail_remove.2 \ jail.2 jail_set.2 MLINKS+=kldunload.2 kldunloadf.2 MLINKS+=kqueue.2 kevent.2 \ kqueue.2 EV_SET.3 MLINKS+=link.2 linkat.2 MLINKS+=madvise.2 posix_madvise.2 MLINKS+=mkdir.2 mkdirat.2 MLINKS+=mkfifo.2 mkfifoat.2 MLINKS+=mknod.2 mknodat.2 MLINKS+=mlock.2 munlock.2 MLINKS+=mlockall.2 munlockall.2 MLINKS+=modnext.2 modfnext.2 MLINKS+=mount.2 nmount.2 \ mount.2 unmount.2 MLINKS+=mq_receive.2 mq_timedreceive.2 MLINKS+=mq_send.2 mq_timedsend.2 MLINKS+=ntp_adjtime.2 ntp_gettime.2 MLINKS+=numa_getaffinity.2 numa_setaffinity.2 MLINKS+=open.2 openat.2 MLINKS+=pathconf.2 fpathconf.2 MLINKS+=pathconf.2 lpathconf.2 MLINKS+=pdfork.2 pdgetpid.2\ pdfork.2 pdkill.2 \ pdfork.2 pdwait4.2 MLINKS+=pipe.2 pipe2.2 MLINKS+=poll.2 ppoll.2 MLINKS+=rctl_add_rule.2 rctl_get_limits.2 \ rctl_add_rule.2 rctl_get_racct.2 \ rctl_add_rule.2 rctl_get_rules.2 \ rctl_add_rule.2 rctl_remove_rule.2 MLINKS+=read.2 pread.2 \ read.2 preadv.2 \ read.2 readv.2 MLINKS+=readlink.2 readlinkat.2 MLINKS+=recv.2 recvfrom.2 \ recv.2 recvmsg.2 MLINKS+=rename.2 renameat.2 MLINKS+=rtprio.2 rtprio_thread.2 .if !defined(NO_P1003_1B) MLINKS+=sched_get_priority_max.2 sched_get_priority_min.2 \ sched_get_priority_max.2 sched_rr_get_interval.2 MLINKS+=sched_setparam.2 sched_getparam.2 MLINKS+=sched_setscheduler.2 sched_getscheduler.2 .endif MLINKS+=select.2 FD_CLR.3 \ select.2 FD_ISSET.3 \ select.2 FD_SET.3 \ select.2 FD_ZERO.3 MLINKS+=send.2 sendmsg.2 \ send.2 sendto.2 MLINKS+=setpgid.2 setpgrp.2 MLINKS+=setresuid.2 getresgid.2 \ setresuid.2 getresuid.2 \ setresuid.2 setresgid.2 MLINKS+=setuid.2 setegid.2 \ setuid.2 seteuid.2 \ setuid.2 setgid.2 MLINKS+=shmat.2 shmdt.2 MLINKS+=shm_open.2 shm_unlink.2 MLINKS+=sigwaitinfo.2 sigtimedwait.2 MLINKS+=stat.2 fstat.2 \ stat.2 fstatat.2 \ stat.2 lstat.2 MLINKS+=statfs.2 fstatfs.2 MLINKS+=swapon.2 swapoff.2 MLINKS+=symlink.2 symlinkat.2 MLINKS+=syscall.2 __syscall.2 MLINKS+=timer_settime.2 timer_getoverrun.2 \ timer_settime.2 timer_gettime.2 MLINKS+=thr_kill.2 thr_kill2.2 MLINKS+=truncate.2 ftruncate.2 MLINKS+=unlink.2 unlinkat.2 MLINKS+=utimensat.2 futimens.2 MLINKS+=utimes.2 futimes.2 \ utimes.2 futimesat.2 \ utimes.2 lutimes.2 MLINKS+=wait.2 wait3.2 \ wait.2 wait4.2 \ wait.2 waitpid.2 \ wait.2 waitid.2 \ wait.2 wait6.2 MLINKS+=write.2 pwrite.2 \ write.2 pwritev.2 \ write.2 writev.2 Index: head/lib/libc/sys/Symbol.map =================================================================== --- head/lib/libc/sys/Symbol.map (revision 318735) +++ head/lib/libc/sys/Symbol.map (revision 318736) @@ -1,1049 +1,1034 @@ /* * $FreeBSD$ */ /* * It'd be nice to automatically generate the syscall symbols, but we * don't know to what version they will eventually belong to, so for now * it has to be manual. */ FBSD_1.0 { __acl_aclcheck_fd; __acl_aclcheck_file; __acl_aclcheck_link; __acl_delete_fd; __acl_delete_file; __acl_delete_link; __acl_get_fd; __acl_get_file; __acl_get_link; __acl_set_fd; __acl_set_file; __acl_set_link; __getcwd; __mac_execve; __mac_get_fd; __mac_get_file; __mac_get_link; __mac_get_pid; __mac_get_proc; __mac_set_fd; __mac_set_file; __mac_set_link; __mac_set_proc; __setugid; __syscall; __sysctl; _umtx_op; abort2; accept; access; acct; adjtime; aio_cancel; aio_error; aio_fsync; aio_read; aio_return; aio_suspend; aio_waitcomplete; aio_write; audit; auditctl; auditon; bind; chdir; chflags; chmod; chown; chroot; clock_getres; clock_gettime; clock_settime; close; connect; dup; dup2; eaccess; execve; extattr_delete_fd; extattr_delete_file; extattr_delete_link; extattr_get_fd; extattr_get_file; extattr_get_link; extattr_list_fd; extattr_list_file; extattr_list_link; extattr_set_fd; extattr_set_file; extattr_set_link; extattrctl; fchdir; fchflags; fchmod; fchown; fcntl; fhopen; - fhstat; - fhstatfs; flock; fork; fpathconf; - fstat; - fstatfs; fsync; futimes; getaudit; getaudit_addr; getauid; getcontext; - getdents; - getdirentries; getdtablesize; getegid; geteuid; getfh; - getfsstat; getgid; getgroups; getitimer; getpeername; getpgid; getpgrp; getpid; getppid; getpriority; getresgid; getresuid; getrlimit; getrusage; getsid; getsockname; getsockopt; gettimeofday; getuid; ioctl; issetugid; jail; jail_attach; kenv; kevent; kill; kldfind; kldfirstmod; kldload; kldnext; kldstat; kldsym; kldunload; kldunloadf; kqueue; kmq_notify; /* Do we want these to be public interfaces? */ kmq_open; /* librt uses them to provide mq_xxx. */ kmq_setattr; kmq_timedreceive; kmq_timedsend; kmq_unlink; ksem_close; ksem_destroy; ksem_getvalue; ksem_init; ksem_open; ksem_post; ksem_timedwait; ksem_trywait; ksem_unlink; ksem_wait; ktrace; lchflags; lchmod; lchown; lgetfh; link; lio_listio; listen; - lstat; lutimes; mac_syscall; madvise; mincore; minherit; mkdir; mkfifo; - mknod; mlock; mlockall; modfind; modfnext; modnext; modstat; mount; mprotect; msgget; msgrcv; msgsnd; msgsys; msync; munlock; munlockall; munmap; nanosleep; netbsd_lchown; netbsd_msync; nfssvc; - nfstat; - nlstat; nmount; - nstat; ntp_adjtime; ntp_gettime; open; pathconf; pipe; poll; posix_openpt; preadv; profil; pselect; ptrace; pwritev; quotactl; read; readlink; readv; reboot; recvfrom; recvmsg; rename; revoke; rfork; rmdir; rtprio; rtprio_thread; sched_get_priority_max; sched_get_priority_min; sched_getparam; sched_getscheduler; sched_rr_get_interval; sched_setparam; sched_setscheduler; sched_yield; select; semget; semop; semsys; sendfile; sendmsg; sendto; setaudit; setaudit_addr; setauid; setegid; seteuid; setgid; setgroups; setitimer; setlogin; setpgid; setpriority; setregid; setresgid; setresuid; setreuid; setrlimit; setsid; setsockopt; settimeofday; setuid; shm_open; shm_unlink; shmat; shmdt; shmget; shmsys; shutdown; sigaction; sigaltstack; sigpending; sigprocmask; sigqueue; sigreturn; sigsuspend; sigtimedwait; sigwait; sigwaitinfo; socket; socketpair; - stat; - statfs; swapoff; swapon; symlink; sync; sysarch; syscall; thr_create; thr_exit; thr_kill; thr_kill2; thr_new; thr_self; thr_set_name; thr_suspend; thr_wake; ktimer_create; /* Do we want these to be public interfaces? */ ktimer_delete; /* librt uses them to provide timer_xxx. */ ktimer_getoverrun; ktimer_gettime; ktimer_settime; umask; undelete; unlink; unmount; utimes; utrace; uuidgen; vadvise; wait4; write; writev; __error; ftruncate; lseek; mmap; pread; pwrite; truncate; }; FBSD_1.1 { __semctl; closefrom; cpuset; cpuset_getid; cpuset_setid; cpuset_getaffinity; cpuset_setaffinity; faccessat; fchmodat; fchownat; fexecve; - fstatat; futimesat; jail_get; jail_set; jail_remove; linkat; lpathconf; mkdirat; mkfifoat; - mknodat; msgctl; readlinkat; renameat; setfib; shmctl; symlinkat; unlinkat; }; FBSD_1.2 { cap_enter; cap_getmode; getloginclass; pdfork; pdgetpid; pdkill; posix_fallocate; rctl_get_racct; rctl_get_rules; rctl_get_limits; rctl_add_rule; rctl_remove_rule; setloginclass; }; FBSD_1.3 { accept4; aio_mlock; bindat; cap_fcntls_get; cap_fcntls_limit; cap_ioctls_get; cap_ioctls_limit; __cap_rights_get; cap_rights_limit; cap_sandboxed; chflagsat; clock_getcpuclockid2; connectat; ffclock_getcounter; ffclock_getestimate; ffclock_setestimate; pipe2; posix_fadvise; procctl; wait6; }; FBSD_1.4 { futimens; ppoll; utimensat; numa_setaffinity; numa_getaffinity; sendmmsg; recvmmsg; }; FBSD_1.5 { clock_nanosleep; fdatasync; + fhstat; + fhstatfs; + fstat; + fstatat; + fstatfs; + getdents; + getdirentries; + getfsstat; + lstat; + mknod; + mknodat; + stat; + statfs; }; FBSDprivate_1.0 { ___acl_aclcheck_fd; __sys___acl_aclcheck_fd; ___acl_aclcheck_file; __sys___acl_aclcheck_file; ___acl_aclcheck_link; __sys___acl_aclcheck_link; ___acl_delete_fd; __sys___acl_delete_fd; ___acl_delete_file; __sys___acl_delete_file; ___acl_delete_link; __sys___acl_delete_link; ___acl_get_fd; __sys___acl_get_fd; ___acl_get_file; __sys___acl_get_file; ___acl_get_link; __sys___acl_get_link; ___acl_set_fd; __sys___acl_set_fd; ___acl_set_file; __sys___acl_set_file; ___acl_set_link; __sys___acl_set_link; ___getcwd; __sys___getcwd; ___mac_execve; __sys___mac_execve; ___mac_get_fd; __sys___mac_get_fd; ___mac_get_file; __sys___mac_get_file; ___mac_get_link; __sys___mac_get_link; ___mac_get_pid; __sys___mac_get_pid; ___mac_get_proc; __sys___mac_get_proc; ___mac_set_fd; __sys___mac_set_fd; ___mac_set_file; __sys___mac_set_file; ___mac_set_link; __sys___mac_set_link; ___mac_set_proc; __sys___mac_set_proc; ___semctl; __sys___semctl; ___setugid; __sys___setugid; ___syscall; __sys___syscall; ___sysctl; __sys___sysctl; __umtx_op; __sys__umtx_op; _abort2; __sys_abort2; _accept; __sys_accept; _accept4; __sys_accept4; _access; __sys_access; _acct; __sys_acct; _adjtime; __sys_adjtime; _aio_cancel; __sys_aio_cancel; _aio_error; __sys_aio_error; _aio_fsync; __sys_aio_fsync; _aio_read; __sys_aio_read; _aio_return; __sys_aio_return; _aio_suspend; __sys_aio_suspend; _aio_waitcomplete; __sys_aio_waitcomplete; _aio_write; __sys_aio_write; _audit; __sys_audit; _auditctl; __sys_auditctl; _auditon; __sys_auditon; _bind; __sys_bind; _chdir; __sys_chdir; _chflags; __sys_chflags; _chmod; __sys_chmod; _chown; __sys_chown; _chroot; __sys_chroot; _clock_getcpuclockid2; __sys_clock_getcpuclockid2; _clock_getres; __sys_clock_getres; _clock_gettime; __sys_clock_gettime; __sys_clock_nanosleep; _clock_settime; __sys_clock_settime; _close; __sys_close; _closefrom; __sys_closefrom; _connect; __sys_connect; _cpuset; __sys_cpuset; _cpuset_getid; __sys_cpuset_getid; _cpuset_setid; __sys_cpuset_setid; _cpuset_getaffinity; __sys_cpuset_getaffinity; _cpuset_setaffinity; __sys_cpuset_setaffinity; _dup; __sys_dup; _dup2; __sys_dup2; _eaccess; __sys_eaccess; _execve; __sys_execve; _extattr_delete_fd; __sys_extattr_delete_fd; _extattr_delete_file; __sys_extattr_delete_file; _extattr_delete_link; __sys_extattr_delete_link; _extattr_get_fd; __sys_extattr_get_fd; _extattr_get_file; __sys_extattr_get_file; _extattr_get_link; __sys_extattr_get_link; _extattr_list_fd; __sys_extattr_list_fd; _extattr_list_file; __sys_extattr_list_file; _extattr_list_link; __sys_extattr_list_link; _extattr_set_fd; __sys_extattr_set_fd; _extattr_set_file; __sys_extattr_set_file; _extattr_set_link; __sys_extattr_set_link; _extattrctl; __sys_extattrctl; _fchdir; __sys_fchdir; _fchflags; __sys_fchflags; _fchmod; __sys_fchmod; _fchown; __sys_fchown; _fcntl; __sys_fcntl; __fcntl_compat; _fhopen; __sys_fhopen; _fhstat; __sys_fhstat; _fhstatfs; __sys_fhstatfs; _flock; __sys_flock; _fork; __sys_fork; _fpathconf; __sys_fpathconf; _fstat; __sys_fstat; _fstatfs; __sys_fstatfs; _fsync; __sys_fsync; _fdatasync; __sys_fdatasync; _futimes; __sys_futimes; _getaudit; __sys_getaudit; _getaudit_addr; __sys_getaudit_addr; _getauid; __sys_getauid; _getcontext; __sys_getcontext; - _getdents; - __sys_getdents; _getdirentries; __sys_getdirentries; _getdtablesize; __sys_getdtablesize; _getegid; __sys_getegid; _geteuid; __sys_geteuid; _getfh; __sys_getfh; _getfsstat; __sys_getfsstat; _getgid; __sys_getgid; _getgroups; __sys_getgroups; _getitimer; __sys_getitimer; _getpeername; __sys_getpeername; _getpgid; __sys_getpgid; _getpgrp; __sys_getpgrp; _getpid; __sys_getpid; _getppid; __sys_getppid; _getpriority; __sys_getpriority; _getresgid; __sys_getresgid; _getresuid; __sys_getresuid; _getrlimit; __sys_getrlimit; _getrusage; __sys_getrusage; _getsid; __sys_getsid; _getsockname; __sys_getsockname; _getsockopt; __sys_getsockopt; _gettimeofday; __sys_gettimeofday; _getuid; __sys_getuid; _ioctl; __sys_ioctl; _issetugid; __sys_issetugid; _jail; __sys_jail; _jail_attach; __sys_jail_attach; _kenv; __sys_kenv; _kevent; __sys_kevent; _kill; __sys_kill; _kldfind; __sys_kldfind; _kldfirstmod; __sys_kldfirstmod; _kldload; __sys_kldload; _kldnext; __sys_kldnext; _kldstat; __sys_kldstat; _kldsym; __sys_kldsym; _kldunload; __sys_kldunload; _kldunloadf; __sys_kldunloadf; _kmq_notify; __sys_kmq_notify; _kmq_open; __sys_kmq_open; _kmq_setattr; __sys_kmq_setattr; _kmq_timedreceive; __sys_kmq_timedreceive; _kmq_timedsend; __sys_kmq_timedsend; _kmq_unlink; __sys_kmq_unlink; _kqueue; __sys_kqueue; _ksem_close; __sys_ksem_close; _ksem_destroy; __sys_ksem_destroy; _ksem_getvalue; __sys_ksem_getvalue; _ksem_init; __sys_ksem_init; _ksem_open; __sys_ksem_open; _ksem_post; __sys_ksem_post; _ksem_timedwait; __sys_ksem_timedwait; _ksem_trywait; __sys_ksem_trywait; _ksem_unlink; __sys_ksem_unlink; _ksem_wait; __sys_ksem_wait; _ktrace; __sys_ktrace; _lchflags; __sys_lchflags; _lchmod; __sys_lchmod; _lchown; __sys_lchown; _lgetfh; __sys_lgetfh; _link; __sys_link; _lio_listio; __sys_lio_listio; _listen; __sys_listen; - _lstat; - __sys_lstat; _lutimes; __sys_lutimes; _mac_syscall; __sys_mac_syscall; _madvise; __sys_madvise; _mincore; __sys_mincore; _minherit; __sys_minherit; _mkdir; __sys_mkdir; _mkfifo; __sys_mkfifo; _mknod; __sys_mknod; _mlock; __sys_mlock; _mlockall; __sys_mlockall; _modfind; __sys_modfind; _modfnext; __sys_modfnext; _modnext; __sys_modnext; _modstat; __sys_modstat; _mount; __sys_mount; _mprotect; __sys_mprotect; _msgctl; __sys_msgctl; _msgget; __sys_msgget; _msgrcv; __sys_msgrcv; _msgsnd; __sys_msgsnd; _msgsys; __sys_msgsys; _msync; __sys_msync; _munlock; __sys_munlock; _munlockall; __sys_munlockall; _munmap; __sys_munmap; _nanosleep; __sys_nanosleep; _netbsd_lchown; __sys_netbsd_lchown; _netbsd_msync; __sys_netbsd_msync; _nfssvc; __sys_nfssvc; - _nfstat; - __sys_nfstat; - _nlstat; - __sys_nlstat; _nmount; __sys_nmount; - _nstat; - __sys_nstat; _ntp_adjtime; __sys_ntp_adjtime; _ntp_gettime; __sys_ntp_gettime; _open; __sys_open; _openat; __sys_openat; _pathconf; __sys_pathconf; _pipe; __sys_pipe; _poll; __sys_poll; _ppoll; __sys_ppoll; _preadv; __sys_preadv; _procctl; __sys_procctl; _profil; __sys_profil; _pselect; __sys_pselect; _ptrace; __sys_ptrace; _pwritev; __sys_pwritev; _quotactl; __sys_quotactl; _read; __sys_read; _readlink; __sys_readlink; _readv; __sys_readv; _reboot; __sys_reboot; _recvfrom; __sys_recvfrom; _recvmsg; __sys_recvmsg; _rename; __sys_rename; _revoke; __sys_revoke; _rfork; __sys_rfork; _rmdir; __sys_rmdir; _rtprio; __sys_rtprio; _rtprio_thread; __sys_rtprio_thread; _sched_get_priority_max; __sys_sched_get_priority_max; _sched_get_priority_min; __sys_sched_get_priority_min; _sched_getparam; __sys_sched_getparam; _sched_getscheduler; __sys_sched_getscheduler; _sched_rr_get_interval; __sys_sched_rr_get_interval; _sched_setparam; __sys_sched_setparam; _sched_setscheduler; __sys_sched_setscheduler; _sched_yield; __sys_sched_yield; _select; __sys_select; _semget; __sys_semget; _semop; __sys_semop; _semsys; __sys_semsys; _sendfile; __sys_sendfile; _sendmsg; __sys_sendmsg; _sendto; __sys_sendto; _setaudit; __sys_setaudit; _setaudit_addr; __sys_setaudit_addr; _setauid; __sys_setauid; _setcontext; __sys_setcontext; _setegid; __sys_setegid; _seteuid; __sys_seteuid; _setgid; __sys_setgid; _setgroups; __sys_setgroups; _setitimer; __sys_setitimer; _setlogin; __sys_setlogin; _setpgid; __sys_setpgid; _setpriority; __sys_setpriority; _setregid; __sys_setregid; _setresgid; __sys_setresgid; _setresuid; __sys_setresuid; _setreuid; __sys_setreuid; _setrlimit; __sys_setrlimit; _setsid; __sys_setsid; _setsockopt; __sys_setsockopt; _settimeofday; __sys_settimeofday; _setuid; __sys_setuid; _shm_open; __sys_shm_open; _shm_unlink; __sys_shm_unlink; _shmat; __sys_shmat; _shmctl; __sys_shmctl; _shmdt; __sys_shmdt; _shmget; __sys_shmget; _shmsys; __sys_shmsys; _shutdown; __sys_shutdown; _sigaction; __sys_sigaction; _sigaltstack; __sys_sigaltstack; _sigpending; __sys_sigpending; _sigprocmask; __sys_sigprocmask; _sigqueue; __sys_sigqueue; _sigreturn; __sys_sigreturn; _sigsuspend; __sys_sigsuspend; _sigtimedwait; __sys_sigtimedwait; _sigwait; __sigwait; __sys_sigwait; _sigwaitinfo; __sys_sigwaitinfo; _socket; __sys_socket; _socketpair; __sys_socketpair; - _stat; - __sys_stat; _statfs; __sys_statfs; _swapcontext; __sys_swapcontext; _swapoff; __sys_swapoff; _swapon; __sys_swapon; _symlink; __sys_symlink; _sync; __sys_sync; _sysarch; __sys_sysarch; _syscall; __sys_syscall; _thr_create; __sys_thr_create; _thr_exit; __sys_thr_exit; _thr_kill; __sys_thr_kill; _thr_kill2; __sys_thr_kill2; _thr_new; __sys_thr_new; _thr_self; __sys_thr_self; _thr_set_name; __sys_thr_set_name; _thr_suspend; __sys_thr_suspend; _thr_wake; __sys_thr_wake; _ktimer_create; __sys_ktimer_create; _ktimer_delete; __sys_ktimer_delete; _ktimer_getoverrun; __sys_ktimer_getoverrun; _ktimer_gettime; __sys_ktimer_gettime; _ktimer_settime; __sys_ktimer_settime; _umask; __sys_umask; _undelete; __sys_undelete; _unlink; __sys_unlink; _unmount; __sys_unmount; _utimes; __sys_utimes; _utrace; __sys_utrace; _uuidgen; __sys_uuidgen; _vadvise; __sys_vadvise; _wait4; __sys_wait4; _wait6; __sys_wait6; _write; __sys_write; _writev; __sys_writev; __set_error_selector; nlm_syscall; gssd_syscall; __libc_interposing_slot; __libc_sigwait; }; Index: head/lib/libc/sys/getdents.c =================================================================== --- head/lib/libc/sys/getdents.c (nonexistent) +++ head/lib/libc/sys/getdents.c (revision 318736) @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2012 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include "libc_private.h" + +ssize_t +getdents(int fd, char *buf, size_t nbytes) +{ + + return (__sys_getdirentries(fd, buf, nbytes, NULL)); +} Property changes on: head/lib/libc/sys/getdents.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/sys/getdirentries.2 =================================================================== --- head/lib/libc/sys/getdirentries.2 (revision 318735) +++ head/lib/libc/sys/getdirentries.2 (revision 318736) @@ -1,186 +1,186 @@ .\" Copyright (c) 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)getdirentries.2 8.2 (Berkeley) 5/3/95 .\" $FreeBSD$ .\" .Dd May 3, 1995 .Dt GETDIRENTRIES 2 .Os .Sh NAME .Nm getdirentries , .Nm getdents .Nd "get directory entries in a file system independent format" .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/types.h .In dirent.h -.Ft int -.Fn getdirentries "int fd" "char *buf" "int nbytes" "long *basep" -.Ft int -.Fn getdents "int fd" "char *buf" "int nbytes" +.Ft ssize_t +.Fn getdirentries "int fd" "char *buf" "size_t nbytes" "off_t *basep" +.Ft ssize_t +.Fn getdents "int fd" "char *buf" "size_t nbytes" .Sh DESCRIPTION The .Fn getdirentries and .Fn getdents system calls read directory entries from the directory referenced by the file descriptor .Fa fd into the buffer pointed to by .Fa buf , in a file system independent format. Up to .Fa nbytes of data will be transferred. The .Fa nbytes argument must be greater than or equal to the block size associated with the file, see .Xr stat 2 . Some file systems may not support these system calls with buffers smaller than this size. .Pp The data in the buffer is a series of .Vt dirent structures each containing the following entries: .Bd -literal -offset indent uint32_t d_fileno; uint16_t d_reclen; uint8_t d_type; uint8_t d_namlen; char d_name[MAXNAMLEN + 1]; /* see below */ .Ed .Pp The .Fa d_fileno entry is a number which is unique for each distinct file in the file system. Files that are linked by hard links (see .Xr link 2 ) have the same .Fa d_fileno . The .Fa d_reclen entry is the length, in bytes, of the directory record. The .Fa d_type entry is the type of the file pointed to by the directory record. The file type values are defined in .Fa . The .Fa d_name entry contains a null terminated file name. The .Fa d_namlen entry specifies the length of the file name excluding the null byte. Thus the actual size of .Fa d_name may vary from 1 to .Dv MAXNAMLEN \&+ 1. .Pp Entries may be separated by extra space. The .Fa d_reclen entry may be used as an offset from the start of a .Fa dirent structure to the next structure, if any. .Pp The actual number of bytes transferred is returned. The current position pointer associated with .Fa fd is set to point to the next block of entries. The pointer may not advance by the number of bytes returned by .Fn getdirentries or .Fn getdents . A value of zero is returned when the end of the directory has been reached. .Pp The .Fn getdirentries system call writes the position of the block read into the location pointed to by .Fa basep . Alternatively, the current position pointer may be set and retrieved by .Xr lseek 2 . The current position pointer should only be set to a value returned by .Xr lseek 2 , a value returned in the location pointed to by .Fa basep .Po Fn getdirentries only .Pc or zero. .Sh RETURN VALUES If successful, the number of bytes actually transferred is returned. Otherwise, -1 is returned and the global variable .Va errno is set to indicate the error. .Sh ERRORS The .Fn getdirentries system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument is not a valid file descriptor open for reading. .It Bq Er EFAULT Either .Fa buf or .Fa basep point outside the allocated address space. .It Bq Er EINVAL The file referenced by .Fa fd is not a directory, or .Fa nbytes is too small for returning a directory entry or block of entries, or the current position pointer is invalid. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .El .Sh SEE ALSO .Xr lseek 2 , .Xr open 2 .Sh HISTORY The .Fn getdirentries system call first appeared in .Bx 4.4 . The .Fn getdents system call first appeared in .Fx 3.0 . Index: head/lib/libc/sys/lstat.c =================================================================== --- head/lib/libc/sys/lstat.c (nonexistent) +++ head/lib/libc/sys/lstat.c (revision 318736) @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2012 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include "libc_private.h" + +int +lstat(const char *path, struct stat *sb) +{ + + return (__sys_fstatat(AT_FDCWD, path, sb, AT_SYMLINK_NOFOLLOW)); +} Property changes on: head/lib/libc/sys/lstat.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/sys/mknod.c =================================================================== --- head/lib/libc/sys/mknod.c (nonexistent) +++ head/lib/libc/sys/mknod.c (revision 318736) @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2011 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include "libc_private.h" + +int __sys_mknodat(int, const char *, mode_t, dev_t); + +int +mknod(const char *path, mode_t mode, dev_t dev) +{ + + return (__sys_mknodat(AT_FDCWD, path, mode, dev)); +} Property changes on: head/lib/libc/sys/mknod.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/sys/stat.c =================================================================== --- head/lib/libc/sys/stat.c (nonexistent) +++ head/lib/libc/sys/stat.c (revision 318736) @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2012 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include "libc_private.h" + +int +stat(const char *path, struct stat *sb) +{ + + return (__sys_fstatat(AT_FDCWD, path, sb, 0)); +} Property changes on: head/lib/libc/sys/stat.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/sys/statfs.2 =================================================================== --- head/lib/libc/sys/statfs.2 (revision 318735) +++ head/lib/libc/sys/statfs.2 (revision 318736) @@ -1,235 +1,235 @@ .\" Copyright (c) 1989, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)statfs.2 8.5 (Berkeley) 5/24/95 .\" $FreeBSD$ .\" -.Dd November 1, 2006 +.Dd February 13, 2017 .Dt STATFS 2 .Os .Sh NAME .Nm statfs .Nd get file system statistics .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/param.h .In sys/mount.h .Ft int .Fn statfs "const char *path" "struct statfs *buf" .Ft int .Fn fstatfs "int fd" "struct statfs *buf" .Sh DESCRIPTION The .Fn statfs system call returns information about a mounted file system. The .Fa path argument is the path name of any file within the mounted file system. The .Fa buf argument is a pointer to a .Vt statfs structure defined as follows: .Bd -literal typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ /* * filesystem statistics */ #define MFSNAMELEN 16 /* length of type name including null */ -#define MNAMELEN 88 /* size of on/from name bufs */ -#define STATFS_VERSION 0x20030518 /* current version number */ +#define MNAMELEN 1024 /* size of on/from name bufs */ +#define STATFS_VERSION 0x20140518 /* current version number */ struct statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[MFSNAMELEN]; /* filesystem type name */ char f_mntfromname[MNAMELEN]; /* mounted filesystem */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ }; .Ed .Pp The flags that may be returned include: .Bl -tag -width MNT_SYNCHRONOUS .It Dv MNT_RDONLY The file system is mounted read-only; Even the super-user may not write on it. .It Dv MNT_NOEXEC Files may not be executed from the file system. .It Dv MNT_NOSUID Setuid and setgid bits on files are not honored when they are executed. .It Dv MNT_SYNCHRONOUS All I/O to the file system is done synchronously. .It Dv MNT_ASYNC No file system I/O is done synchronously. .It Dv MNT_SOFTDEP Soft updates being done (see .Xr ffs 7 ) . .It Dv MNT_GJOURNAL Journaling with gjournal is enabled (see .Xr gjournal 8 ) . .It Dv MNT_SUIDDIR Special handling of SUID bit on directories. .It Dv MNT_UNION Union with underlying file system. .It Dv MNT_NOSYMFOLLOW Symbolic links are not followed. .It Dv MNT_NOCLUSTERR Read clustering is disabled. .It Dv MNT_NOCLUSTERW Write clustering is disabled. .\".It Dv MNT_JAILDEVFS .\"XXX .It Dv MNT_MULTILABEL Mandatory Access Control (MAC) support for individual objects (see .Xr mac 4 ) . .It Dv MNT_ACLS Access Control List (ACL) support enabled. .It Dv MNT_LOCAL The file system resides locally. .It Dv MNT_QUOTA The file system has quotas enabled on it. .It Dv MNT_ROOTFS Identifies the root file system. .It Dv MNT_EXRDONLY The file system is exported read-only. .It Dv MNT_NOATIME Updating of file access times is disabled. .It Dv MNT_USER The file system has been mounted by a user. .\".It Dv MNT_IGNORE .\"XXX .It Dv MNT_EXPORTED The file system is exported for both reading and writing. .It Dv MNT_DEFEXPORTED The file system is exported for both reading and writing to any Internet host. .It Dv MNT_EXPORTANON The file system maps all remote accesses to the anonymous user. .It Dv MNT_EXKERB The file system is exported with Kerberos uid mapping. .It Dv MNT_EXPUBLIC The file system is exported publicly (WebNFS). .El .Pp Fields that are undefined for a particular file system are set to -1. The .Fn fstatfs system call returns the same information about an open file referenced by descriptor .Fa fd . .Sh RETURN VALUES .Rv -std .Sh ERRORS The .Fn statfs system call fails if one or more of the following are true: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix of .Fa path is not a directory. .It Bq Er ENAMETOOLONG The length of a component of .Fa path exceeds 255 characters, or the length of .Fa path exceeds 1023 characters. .It Bq Er ENOENT The file referred to by .Fa path does not exist. .It Bq Er EACCES Search permission is denied for a component of the path prefix of .Fa path . .It Bq Er ELOOP Too many symbolic links were encountered in translating .Fa path . .It Bq Er EFAULT The .Fa buf or .Fa path argument points to an invalid address. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .El .Pp The .Fn fstatfs system call fails if one or more of the following are true: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument is not a valid open file descriptor. .It Bq Er EFAULT The .Fa buf argument points to an invalid address. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .El .Sh SEE ALSO .Xr fhstatfs 2 .Sh HISTORY The .Fn statfs system call first appeared in .Bx 4.4 . Index: head/lib/libkvm/kvm_proc.c =================================================================== --- head/lib/libkvm/kvm_proc.c (revision 318735) +++ head/lib/libkvm/kvm_proc.c (revision 318736) @@ -1,744 +1,745 @@ /*- * Copyright (c) 1989, 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software developed by the Computer Systems * Engineering group at Lawrence Berkeley Laboratory under DARPA contract * BG 91-66 and contributed to Berkeley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)kvm_proc.c 8.3 (Berkeley) 9/23/93"; #endif /* LIBC_SCCS and not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Proc traversal interface for kvm. ps and w are (probably) the exclusive * users of this code, so we've factored it out into a separate module. * Thus, we keep this grunge out of the other kvm applications (i.e., * most other applications are interested only in open/close/read/nlist). */ #include #define _WANT_UCRED /* make ucred.h give us 'struct ucred' */ #include #include #include #include #include #include #include #include #define _WANT_PRISON /* make jail.h give us 'struct prison' */ #include #include #include #include #include #include #include #include #define _WANT_KW_EXITCODE #include #include #include #include #include #include #include #include #include #include #include "kvm_private.h" #define KREAD(kd, addr, obj) \ (kvm_read(kd, addr, (char *)(obj), sizeof(*obj)) != sizeof(*obj)) static int ticks; static int hz; static uint64_t cpu_tick_frequency; /* * From sys/kern/kern_tc.c. Depends on cpu_tick_frequency, which is * read/initialized before this function is ever called. */ static uint64_t cputick2usec(uint64_t tick) { if (cpu_tick_frequency == 0) return (0); if (tick > 18446744073709551) /* floor(2^64 / 1000) */ return (tick / (cpu_tick_frequency / 1000000)); else if (tick > 18446744073709) /* floor(2^64 / 1000000) */ return ((tick * 1000) / (cpu_tick_frequency / 1000)); else return ((tick * 1000000) / cpu_tick_frequency); } /* * Read proc's from memory file into buffer bp, which has space to hold * at most maxcnt procs. */ static int kvm_proclist(kvm_t *kd, int what, int arg, struct proc *p, struct kinfo_proc *bp, int maxcnt) { int cnt = 0; struct kinfo_proc kinfo_proc, *kp; struct pgrp pgrp; struct session sess; struct cdev t_cdev; struct tty tty; struct vmspace vmspace; struct sigacts sigacts; #if 0 struct pstats pstats; #endif struct ucred ucred; struct prison pr; struct thread mtd; struct proc proc; struct proc pproc; struct sysentvec sysent; char svname[KI_EMULNAMELEN]; kp = &kinfo_proc; kp->ki_structsize = sizeof(kinfo_proc); /* * Loop on the processes. this is completely broken because we need to be * able to loop on the threads and merge the ones that are the same process some how. */ for (; cnt < maxcnt && p != NULL; p = LIST_NEXT(&proc, p_list)) { memset(kp, 0, sizeof *kp); if (KREAD(kd, (u_long)p, &proc)) { _kvm_err(kd, kd->program, "can't read proc at %p", p); return (-1); } if (proc.p_state == PRS_NEW) continue; if (proc.p_state != PRS_ZOMBIE) { if (KREAD(kd, (u_long)TAILQ_FIRST(&proc.p_threads), &mtd)) { _kvm_err(kd, kd->program, "can't read thread at %p", TAILQ_FIRST(&proc.p_threads)); return (-1); } } if (KREAD(kd, (u_long)proc.p_ucred, &ucred) == 0) { kp->ki_ruid = ucred.cr_ruid; kp->ki_svuid = ucred.cr_svuid; kp->ki_rgid = ucred.cr_rgid; kp->ki_svgid = ucred.cr_svgid; kp->ki_cr_flags = ucred.cr_flags; if (ucred.cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } else kp->ki_ngroups = ucred.cr_ngroups; kvm_read(kd, (u_long)ucred.cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); kp->ki_uid = ucred.cr_uid; if (ucred.cr_prison != NULL) { if (KREAD(kd, (u_long)ucred.cr_prison, &pr)) { _kvm_err(kd, kd->program, "can't read prison at %p", ucred.cr_prison); return (-1); } kp->ki_jid = pr.pr_id; } } switch(what & ~KERN_PROC_INC_THREAD) { case KERN_PROC_GID: if (kp->ki_groups[0] != (gid_t)arg) continue; break; case KERN_PROC_PID: if (proc.p_pid != (pid_t)arg) continue; break; case KERN_PROC_RGID: if (kp->ki_rgid != (gid_t)arg) continue; break; case KERN_PROC_UID: if (kp->ki_uid != (uid_t)arg) continue; break; case KERN_PROC_RUID: if (kp->ki_ruid != (uid_t)arg) continue; break; } /* * We're going to add another proc to the set. If this * will overflow the buffer, assume the reason is because * nprocs (or the proc list) is corrupt and declare an error. */ if (cnt >= maxcnt) { _kvm_err(kd, kd->program, "nprocs corrupt"); return (-1); } /* * gather kinfo_proc */ kp->ki_paddr = p; kp->ki_addr = 0; /* XXX uarea */ /* kp->ki_kstack = proc.p_thread.td_kstack; XXXKSE */ kp->ki_args = proc.p_args; kp->ki_tracep = proc.p_tracevp; kp->ki_textvp = proc.p_textvp; kp->ki_fd = proc.p_fd; kp->ki_vmspace = proc.p_vmspace; if (proc.p_sigacts != NULL) { if (KREAD(kd, (u_long)proc.p_sigacts, &sigacts)) { _kvm_err(kd, kd->program, "can't read sigacts at %p", proc.p_sigacts); return (-1); } kp->ki_sigignore = sigacts.ps_sigignore; kp->ki_sigcatch = sigacts.ps_sigcatch; } #if 0 if ((proc.p_flag & P_INMEM) && proc.p_stats != NULL) { if (KREAD(kd, (u_long)proc.p_stats, &pstats)) { _kvm_err(kd, kd->program, "can't read stats at %x", proc.p_stats); return (-1); } kp->ki_start = pstats.p_start; /* * XXX: The times here are probably zero and need * to be calculated from the raw data in p_rux and * p_crux. */ kp->ki_rusage = pstats.p_ru; kp->ki_childstime = pstats.p_cru.ru_stime; kp->ki_childutime = pstats.p_cru.ru_utime; /* Some callers want child-times in a single value */ timeradd(&kp->ki_childstime, &kp->ki_childutime, &kp->ki_childtime); } #endif if (proc.p_oppid) kp->ki_ppid = proc.p_oppid; else if (proc.p_pptr) { if (KREAD(kd, (u_long)proc.p_pptr, &pproc)) { _kvm_err(kd, kd->program, "can't read pproc at %p", proc.p_pptr); return (-1); } kp->ki_ppid = pproc.p_pid; } else kp->ki_ppid = 0; if (proc.p_pgrp == NULL) goto nopgrp; if (KREAD(kd, (u_long)proc.p_pgrp, &pgrp)) { _kvm_err(kd, kd->program, "can't read pgrp at %p", proc.p_pgrp); return (-1); } kp->ki_pgid = pgrp.pg_id; kp->ki_jobc = pgrp.pg_jobc; if (KREAD(kd, (u_long)pgrp.pg_session, &sess)) { _kvm_err(kd, kd->program, "can't read session at %p", pgrp.pg_session); return (-1); } kp->ki_sid = sess.s_sid; (void)memcpy(kp->ki_login, sess.s_login, sizeof(kp->ki_login)); kp->ki_kiflag = sess.s_ttyvp ? KI_CTTY : 0; if (sess.s_leader == p) kp->ki_kiflag |= KI_SLEADER; if ((proc.p_flag & P_CONTROLT) && sess.s_ttyp != NULL) { if (KREAD(kd, (u_long)sess.s_ttyp, &tty)) { _kvm_err(kd, kd->program, "can't read tty at %p", sess.s_ttyp); return (-1); } if (tty.t_dev != NULL) { if (KREAD(kd, (u_long)tty.t_dev, &t_cdev)) { _kvm_err(kd, kd->program, "can't read cdev at %p", tty.t_dev); return (-1); } #if 0 kp->ki_tdev = t_cdev.si_udev; #else kp->ki_tdev = NODEV; #endif } if (tty.t_pgrp != NULL) { if (KREAD(kd, (u_long)tty.t_pgrp, &pgrp)) { _kvm_err(kd, kd->program, "can't read tpgrp at %p", tty.t_pgrp); return (-1); } kp->ki_tpgid = pgrp.pg_id; } else kp->ki_tpgid = -1; if (tty.t_session != NULL) { if (KREAD(kd, (u_long)tty.t_session, &sess)) { _kvm_err(kd, kd->program, "can't read session at %p", tty.t_session); return (-1); } kp->ki_tsid = sess.s_sid; } } else { nopgrp: kp->ki_tdev = NODEV; } if ((proc.p_state != PRS_ZOMBIE) && mtd.td_wmesg) (void)kvm_read(kd, (u_long)mtd.td_wmesg, kp->ki_wmesg, WMESGLEN); (void)kvm_read(kd, (u_long)proc.p_vmspace, (char *)&vmspace, sizeof(vmspace)); kp->ki_size = vmspace.vm_map.size; /* * Approximate the kernel's method of calculating * this field. */ #define pmap_resident_count(pm) ((pm)->pm_stats.resident_count) kp->ki_rssize = pmap_resident_count(&vmspace.vm_pmap); kp->ki_swrss = vmspace.vm_swrss; kp->ki_tsize = vmspace.vm_tsize; kp->ki_dsize = vmspace.vm_dsize; kp->ki_ssize = vmspace.vm_ssize; switch (what & ~KERN_PROC_INC_THREAD) { case KERN_PROC_PGRP: if (kp->ki_pgid != (pid_t)arg) continue; break; case KERN_PROC_SESSION: if (kp->ki_sid != (pid_t)arg) continue; break; case KERN_PROC_TTY: if ((proc.p_flag & P_CONTROLT) == 0 || kp->ki_tdev != (dev_t)arg) continue; break; } if (proc.p_comm[0] != 0) strlcpy(kp->ki_comm, proc.p_comm, MAXCOMLEN); (void)kvm_read(kd, (u_long)proc.p_sysent, (char *)&sysent, sizeof(sysent)); (void)kvm_read(kd, (u_long)sysent.sv_name, (char *)&svname, sizeof(svname)); if (svname[0] != 0) strlcpy(kp->ki_emul, svname, KI_EMULNAMELEN); if ((proc.p_state != PRS_ZOMBIE) && (mtd.td_blocked != 0)) { kp->ki_kiflag |= KI_LOCKBLOCK; if (mtd.td_lockname) (void)kvm_read(kd, (u_long)mtd.td_lockname, kp->ki_lockname, LOCKNAMELEN); kp->ki_lockname[LOCKNAMELEN] = 0; } kp->ki_runtime = cputick2usec(proc.p_rux.rux_runtime); kp->ki_pid = proc.p_pid; kp->ki_siglist = proc.p_siglist; SIGSETOR(kp->ki_siglist, mtd.td_siglist); kp->ki_sigmask = mtd.td_sigmask; kp->ki_xstat = KW_EXITCODE(proc.p_xexit, proc.p_xsig); kp->ki_acflag = proc.p_acflag; kp->ki_lock = proc.p_lock; if (proc.p_state != PRS_ZOMBIE) { kp->ki_swtime = (ticks - proc.p_swtick) / hz; kp->ki_flag = proc.p_flag; kp->ki_sflag = 0; kp->ki_nice = proc.p_nice; kp->ki_traceflag = proc.p_traceflag; if (proc.p_state == PRS_NORMAL) { if (TD_ON_RUNQ(&mtd) || TD_CAN_RUN(&mtd) || TD_IS_RUNNING(&mtd)) { kp->ki_stat = SRUN; } else if (mtd.td_state == TDS_INHIBITED) { if (P_SHOULDSTOP(&proc)) { kp->ki_stat = SSTOP; } else if ( TD_IS_SLEEPING(&mtd)) { kp->ki_stat = SSLEEP; } else if (TD_ON_LOCK(&mtd)) { kp->ki_stat = SLOCK; } else { kp->ki_stat = SWAIT; } } } else { kp->ki_stat = SIDL; } /* Stuff from the thread */ kp->ki_pri.pri_level = mtd.td_priority; kp->ki_pri.pri_native = mtd.td_base_pri; kp->ki_lastcpu = mtd.td_lastcpu; kp->ki_wchan = mtd.td_wchan; kp->ki_oncpu = mtd.td_oncpu; if (mtd.td_name[0] != '\0') strlcpy(kp->ki_tdname, mtd.td_name, sizeof(kp->ki_tdname)); kp->ki_pctcpu = 0; kp->ki_rqindex = 0; /* * Note: legacy fields; wraps at NO_CPU_OLD or the * old max CPU value as appropriate */ if (mtd.td_lastcpu == NOCPU) kp->ki_lastcpu_old = NOCPU_OLD; else if (mtd.td_lastcpu > MAXCPU_OLD) kp->ki_lastcpu_old = MAXCPU_OLD; else kp->ki_lastcpu_old = mtd.td_lastcpu; if (mtd.td_oncpu == NOCPU) kp->ki_oncpu_old = NOCPU_OLD; else if (mtd.td_oncpu > MAXCPU_OLD) kp->ki_oncpu_old = MAXCPU_OLD; else kp->ki_oncpu_old = mtd.td_oncpu; } else { kp->ki_stat = SZOMB; } + kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ bcopy(&kinfo_proc, bp, sizeof(kinfo_proc)); ++bp; ++cnt; } return (cnt); } /* * Build proc info array by reading in proc list from a crash dump. * Return number of procs read. maxcnt is the max we will read. */ static int kvm_deadprocs(kvm_t *kd, int what, int arg, u_long a_allproc, u_long a_zombproc, int maxcnt) { struct kinfo_proc *bp = kd->procbase; int acnt, zcnt; struct proc *p; if (KREAD(kd, a_allproc, &p)) { _kvm_err(kd, kd->program, "cannot read allproc"); return (-1); } acnt = kvm_proclist(kd, what, arg, p, bp, maxcnt); if (acnt < 0) return (acnt); if (KREAD(kd, a_zombproc, &p)) { _kvm_err(kd, kd->program, "cannot read zombproc"); return (-1); } zcnt = kvm_proclist(kd, what, arg, p, bp + acnt, maxcnt - acnt); if (zcnt < 0) zcnt = 0; return (acnt + zcnt); } struct kinfo_proc * kvm_getprocs(kvm_t *kd, int op, int arg, int *cnt) { int mib[4], st, nprocs; size_t size, osize; int temp_op; if (kd->procbase != 0) { free((void *)kd->procbase); /* * Clear this pointer in case this call fails. Otherwise, * kvm_close() will free it again. */ kd->procbase = 0; } if (ISALIVE(kd)) { size = 0; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = op; mib[3] = arg; temp_op = op & ~KERN_PROC_INC_THREAD; st = sysctl(mib, temp_op == KERN_PROC_ALL || temp_op == KERN_PROC_PROC ? 3 : 4, NULL, &size, NULL, 0); if (st == -1) { _kvm_syserr(kd, kd->program, "kvm_getprocs"); return (0); } /* * We can't continue with a size of 0 because we pass * it to realloc() (via _kvm_realloc()), and passing 0 * to realloc() results in undefined behavior. */ if (size == 0) { /* * XXX: We should probably return an invalid, * but non-NULL, pointer here so any client * program trying to dereference it will * crash. However, _kvm_freeprocs() calls * free() on kd->procbase if it isn't NULL, * and free()'ing a junk pointer isn't good. * Then again, _kvm_freeprocs() isn't used * anywhere . . . */ kd->procbase = _kvm_malloc(kd, 1); goto liveout; } do { size += size / 10; kd->procbase = (struct kinfo_proc *) _kvm_realloc(kd, kd->procbase, size); if (kd->procbase == NULL) return (0); osize = size; st = sysctl(mib, temp_op == KERN_PROC_ALL || temp_op == KERN_PROC_PROC ? 3 : 4, kd->procbase, &size, NULL, 0); } while (st == -1 && errno == ENOMEM && size == osize); if (st == -1) { _kvm_syserr(kd, kd->program, "kvm_getprocs"); return (0); } /* * We have to check the size again because sysctl() * may "round up" oldlenp if oldp is NULL; hence it * might've told us that there was data to get when * there really isn't any. */ if (size > 0 && kd->procbase->ki_structsize != sizeof(struct kinfo_proc)) { _kvm_err(kd, kd->program, "kinfo_proc size mismatch (expected %zu, got %d)", sizeof(struct kinfo_proc), kd->procbase->ki_structsize); return (0); } liveout: nprocs = size == 0 ? 0 : size / kd->procbase->ki_structsize; } else { struct nlist nl[7], *p; nl[0].n_name = "_nprocs"; nl[1].n_name = "_allproc"; nl[2].n_name = "_zombproc"; nl[3].n_name = "_ticks"; nl[4].n_name = "_hz"; nl[5].n_name = "_cpu_tick_frequency"; nl[6].n_name = 0; if (!kd->arch->ka_native(kd)) { _kvm_err(kd, kd->program, "cannot read procs from non-native core"); return (0); } if (kvm_nlist(kd, nl) != 0) { for (p = nl; p->n_type != 0; ++p) ; _kvm_err(kd, kd->program, "%s: no such symbol", p->n_name); return (0); } if (KREAD(kd, nl[0].n_value, &nprocs)) { _kvm_err(kd, kd->program, "can't read nprocs"); return (0); } if (KREAD(kd, nl[3].n_value, &ticks)) { _kvm_err(kd, kd->program, "can't read ticks"); return (0); } if (KREAD(kd, nl[4].n_value, &hz)) { _kvm_err(kd, kd->program, "can't read hz"); return (0); } if (KREAD(kd, nl[5].n_value, &cpu_tick_frequency)) { _kvm_err(kd, kd->program, "can't read cpu_tick_frequency"); return (0); } size = nprocs * sizeof(struct kinfo_proc); kd->procbase = (struct kinfo_proc *)_kvm_malloc(kd, size); if (kd->procbase == NULL) return (0); nprocs = kvm_deadprocs(kd, op, arg, nl[1].n_value, nl[2].n_value, nprocs); if (nprocs <= 0) { _kvm_freeprocs(kd); nprocs = 0; } #ifdef notdef else { size = nprocs * sizeof(struct kinfo_proc); kd->procbase = realloc(kd->procbase, size); } #endif } *cnt = nprocs; return (kd->procbase); } void _kvm_freeprocs(kvm_t *kd) { free(kd->procbase); kd->procbase = NULL; } void * _kvm_realloc(kvm_t *kd, void *p, size_t n) { void *np; np = reallocf(p, n); if (np == NULL) _kvm_err(kd, kd->program, "out of memory"); return (np); } /* * Get the command args or environment. */ static char ** kvm_argv(kvm_t *kd, const struct kinfo_proc *kp, int env, int nchr) { int oid[4]; int i; size_t bufsz; static int buflen; static char *buf, *p; static char **bufp; static int argc; char **nbufp; if (!ISALIVE(kd)) { _kvm_err(kd, kd->program, "cannot read user space from dead kernel"); return (NULL); } if (nchr == 0 || nchr > ARG_MAX) nchr = ARG_MAX; if (buflen == 0) { buf = malloc(nchr); if (buf == NULL) { _kvm_err(kd, kd->program, "cannot allocate memory"); return (NULL); } argc = 32; bufp = malloc(sizeof(char *) * argc); if (bufp == NULL) { free(buf); buf = NULL; _kvm_err(kd, kd->program, "cannot allocate memory"); return (NULL); } buflen = nchr; } else if (nchr > buflen) { p = realloc(buf, nchr); if (p != NULL) { buf = p; buflen = nchr; } } oid[0] = CTL_KERN; oid[1] = KERN_PROC; oid[2] = env ? KERN_PROC_ENV : KERN_PROC_ARGS; oid[3] = kp->ki_pid; bufsz = buflen; if (sysctl(oid, 4, buf, &bufsz, 0, 0) == -1) { /* * If the supplied buf is too short to hold the requested * value the sysctl returns with ENOMEM. The buf is filled * with the truncated value and the returned bufsz is equal * to the requested len. */ if (errno != ENOMEM || bufsz != (size_t)buflen) return (NULL); buf[bufsz - 1] = '\0'; errno = 0; } else if (bufsz == 0) return (NULL); i = 0; p = buf; do { bufp[i++] = p; p += strlen(p) + 1; if (i >= argc) { argc += argc; nbufp = realloc(bufp, sizeof(char *) * argc); if (nbufp == NULL) return (NULL); bufp = nbufp; } } while (p < buf + bufsz); bufp[i++] = 0; return (bufp); } char ** kvm_getargv(kvm_t *kd, const struct kinfo_proc *kp, int nchr) { return (kvm_argv(kd, kp, 0, nchr)); } char ** kvm_getenvv(kvm_t *kd, const struct kinfo_proc *kp, int nchr) { return (kvm_argv(kd, kp, 1, nchr)); } Index: head/lib/libmilter/Makefile =================================================================== --- head/lib/libmilter/Makefile (revision 318735) +++ head/lib/libmilter/Makefile (revision 318736) @@ -1,36 +1,37 @@ # $FreeBSD$ .include PACKAGE=sendmail SENDMAIL_DIR=${SRCTOP}/contrib/sendmail .PATH: ${SENDMAIL_DIR}/libmilter ${SENDMAIL_DIR}/libsm CFLAGS+=-I${SENDMAIL_DIR}/src -I${SENDMAIL_DIR}/include -I. CFLAGS+=-DNOT_SENDMAIL -Dsm_snprintf=snprintf CFLAGS+=-D_THREAD_SAFE CFLAGS+=-DSM_CONF_POLL .if ${MK_INET6_SUPPORT} != "no" CFLAGS+=-DNETINET6 .endif # User customizations to the sendmail build environment CFLAGS+=${SENDMAIL_CFLAGS} INCSDIR=${INCLUDEDIR}/libmilter INCS= ${SENDMAIL_DIR}/include/libmilter/mfapi.h \ ${SENDMAIL_DIR}/include/libmilter/mfdef.h LIB= milter SRCS+= sm_os.h SRCS+= main.c engine.c listener.c handler.c comm.c monitor.c smfi.c \ signal.c sm_gethost.c errstring.c strl.c worker.c CLEANFILES+=sm_os.h WARNS?= 0 +SHLIB_MAJOR= 6 sm_os.h: ${SENDMAIL_DIR}/include/sm/os/sm_os_freebsd.h .NOMETA ln -sf ${.ALLSRC} ${.TARGET} .include Index: head/lib/libprocstat/Makefile =================================================================== --- head/lib/libprocstat/Makefile (revision 318735) +++ head/lib/libprocstat/Makefile (revision 318736) @@ -1,72 +1,76 @@ # $FreeBSD$ .include PACKAGE=lib${LIB} LIB= procstat SRCS= cd9660.c \ common_kvm.c \ core.c \ libprocstat.c \ msdosfs.c \ smbfs.c \ udf.c +.if ${MK_SYMVER} == yes +SRCS+= libprocstat_compat.c +.endif + VERSION_DEF= ${LIBCSRCDIR}/Versions.def SYMBOL_MAPS= ${.CURDIR}/Symbol.map INCS= libprocstat.h CFLAGS+= -I. -I${.CURDIR} -D_KVM_VNODE SHLIB_MAJOR= 1 LIBADD= elf kvm util MAN= libprocstat.3 MLINKS+=libprocstat.3 procstat_close.3 \ libprocstat.3 procstat_freeargv.3 \ libprocstat.3 procstat_freeauxv.3 \ libprocstat.3 procstat_freeenvv.3 \ libprocstat.3 procstat_freefiles.3 \ libprocstat.3 procstat_freegroups.3 \ libprocstat.3 procstat_freekstack.3 \ libprocstat.3 procstat_freeprocs.3 \ libprocstat.3 procstat_freevmmap.3 \ libprocstat.3 procstat_get_pipe_info.3 \ libprocstat.3 procstat_get_pts_info.3 \ libprocstat.3 procstat_get_sem_info.3 \ libprocstat.3 procstat_get_shm_info.3 \ libprocstat.3 procstat_get_socket_info.3 \ libprocstat.3 procstat_get_vnode_info.3 \ libprocstat.3 procstat_getargv.3 \ libprocstat.3 procstat_getauxv.3 \ libprocstat.3 procstat_getenvv.3 \ libprocstat.3 procstat_getfiles.3 \ libprocstat.3 procstat_getgroups.3 \ libprocstat.3 procstat_getkstack.3 \ libprocstat.3 procstat_getosrel.3 \ libprocstat.3 procstat_getpathname.3 \ libprocstat.3 procstat_getprocs.3 \ libprocstat.3 procstat_getrlimit.3 \ libprocstat.3 procstat_getumask.3 \ libprocstat.3 procstat_getvmmap.3 \ libprocstat.3 procstat_open_core.3 \ libprocstat.3 procstat_open_kvm.3 \ libprocstat.3 procstat_open_sysctl.3 # XXX This is a hack. .if ${MK_CDDL} != "no" CFLAGS+= -DLIBPROCSTAT_ZFS OBJS+= zfs/zfs.o SOBJS+= zfs/zfs.pico POBJS+= zfs/zfs.po SUBDIR= zfs zfs/zfs.o: .PHONY @cd ${.CURDIR}/zfs && ${MAKE} zfs.o zfs/zfs.pico: .PHONY @cd ${.CURDIR}/zfs && ${MAKE} zfs.pico zfs/zfs.po: .PHONY @cd ${.CURDIR}/zfs && ${MAKE} zfs.po .endif .include Index: head/lib/libprocstat/Symbol.map =================================================================== --- head/lib/libprocstat/Symbol.map (revision 318735) +++ head/lib/libprocstat/Symbol.map (revision 318736) @@ -1,43 +1,43 @@ /* * $FreeBSD$ */ FBSD_1.2 { procstat_close; procstat_freefiles; procstat_freeprocs; procstat_get_pipe_info; - procstat_get_pts_info; procstat_get_socket_info; - procstat_get_vnode_info; procstat_getfiles; procstat_getprocs; procstat_open_kvm; procstat_open_sysctl; }; FBSD_1.3 { procstat_freeargv; procstat_freeauxv; procstat_freeenvv; procstat_freegroups; procstat_freekstack; procstat_freevmmap; - procstat_get_sem_info; - procstat_get_shm_info; procstat_getargv; procstat_getauxv; procstat_getenvv; procstat_getgroups; procstat_getkstack; procstat_getosrel; procstat_getpathname; procstat_getrlimit; procstat_getumask; procstat_getvmmap; procstat_open_core; }; FBSD_1.5 { procstat_freeptlwpinfo; procstat_getptlwpinfo; + procstat_get_pts_info; + procstat_get_sem_info; + procstat_get_shm_info; + procstat_get_vnode_info; }; Index: head/lib/libprocstat/libprocstat.c =================================================================== --- head/lib/libprocstat/libprocstat.c (revision 318735) +++ head/lib/libprocstat/libprocstat.c (revision 318736) @@ -1,2582 +1,2584 @@ /*- * Copyright (c) 2017 Dell EMC * Copyright (c) 2009 Stanislav Sedov * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #define _WANT_UCRED #include #undef _WANT_UCRED #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _WANT_FILE #include #include #include #include #include #include #define _KERNEL #include #include #include #include #include #include #undef _KERNEL #include #include #include #include #include #include #include #include #include #include #define _WANT_INPCB #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libprocstat_internal.h" #include "common_kvm.h" #include "core.h" int statfs(const char *, struct statfs *); /* XXX */ #define PROCSTAT_KVM 1 #define PROCSTAT_SYSCTL 2 #define PROCSTAT_CORE 3 static char **getargv(struct procstat *procstat, struct kinfo_proc *kp, size_t nchr, int env); static char *getmnton(kvm_t *kd, struct mount *m); static struct kinfo_vmentry * kinfo_getvmmap_core(struct procstat_core *core, int *cntp); static Elf_Auxinfo *procstat_getauxv_core(struct procstat_core *core, unsigned int *cntp); static Elf_Auxinfo *procstat_getauxv_sysctl(pid_t pid, unsigned int *cntp); static struct filestat_list *procstat_getfiles_kvm( struct procstat *procstat, struct kinfo_proc *kp, int mmapped); static struct filestat_list *procstat_getfiles_sysctl( struct procstat *procstat, struct kinfo_proc *kp, int mmapped); static int procstat_get_pipe_info_sysctl(struct filestat *fst, struct pipestat *pipe, char *errbuf); static int procstat_get_pipe_info_kvm(kvm_t *kd, struct filestat *fst, struct pipestat *pipe, char *errbuf); static int procstat_get_pts_info_sysctl(struct filestat *fst, struct ptsstat *pts, char *errbuf); static int procstat_get_pts_info_kvm(kvm_t *kd, struct filestat *fst, struct ptsstat *pts, char *errbuf); static int procstat_get_sem_info_sysctl(struct filestat *fst, struct semstat *sem, char *errbuf); static int procstat_get_sem_info_kvm(kvm_t *kd, struct filestat *fst, struct semstat *sem, char *errbuf); static int procstat_get_shm_info_sysctl(struct filestat *fst, struct shmstat *shm, char *errbuf); static int procstat_get_shm_info_kvm(kvm_t *kd, struct filestat *fst, struct shmstat *shm, char *errbuf); static int procstat_get_socket_info_sysctl(struct filestat *fst, struct sockstat *sock, char *errbuf); static int procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst, struct sockstat *sock, char *errbuf); static int to_filestat_flags(int flags); static int procstat_get_vnode_info_kvm(kvm_t *kd, struct filestat *fst, struct vnstat *vn, char *errbuf); static int procstat_get_vnode_info_sysctl(struct filestat *fst, struct vnstat *vn, char *errbuf); static gid_t *procstat_getgroups_core(struct procstat_core *core, unsigned int *count); static gid_t * procstat_getgroups_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned int *count); static gid_t *procstat_getgroups_sysctl(pid_t pid, unsigned int *count); static struct kinfo_kstack *procstat_getkstack_sysctl(pid_t pid, int *cntp); static int procstat_getosrel_core(struct procstat_core *core, int *osrelp); static int procstat_getosrel_kvm(kvm_t *kd, struct kinfo_proc *kp, int *osrelp); static int procstat_getosrel_sysctl(pid_t pid, int *osrelp); static int procstat_getpathname_core(struct procstat_core *core, char *pathname, size_t maxlen); static int procstat_getpathname_sysctl(pid_t pid, char *pathname, size_t maxlen); static int procstat_getrlimit_core(struct procstat_core *core, int which, struct rlimit* rlimit); static int procstat_getrlimit_kvm(kvm_t *kd, struct kinfo_proc *kp, int which, struct rlimit* rlimit); static int procstat_getrlimit_sysctl(pid_t pid, int which, struct rlimit* rlimit); static int procstat_getumask_core(struct procstat_core *core, unsigned short *maskp); static int procstat_getumask_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned short *maskp); static int procstat_getumask_sysctl(pid_t pid, unsigned short *maskp); static int vntype2psfsttype(int type); void procstat_close(struct procstat *procstat) { assert(procstat); if (procstat->type == PROCSTAT_KVM) kvm_close(procstat->kd); else if (procstat->type == PROCSTAT_CORE) procstat_core_close(procstat->core); procstat_freeargv(procstat); procstat_freeenvv(procstat); free(procstat); } struct procstat * procstat_open_sysctl(void) { struct procstat *procstat; procstat = calloc(1, sizeof(*procstat)); if (procstat == NULL) { warn("malloc()"); return (NULL); } procstat->type = PROCSTAT_SYSCTL; return (procstat); } struct procstat * procstat_open_kvm(const char *nlistf, const char *memf) { struct procstat *procstat; kvm_t *kd; char buf[_POSIX2_LINE_MAX]; procstat = calloc(1, sizeof(*procstat)); if (procstat == NULL) { warn("malloc()"); return (NULL); } kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, buf); if (kd == NULL) { warnx("kvm_openfiles(): %s", buf); free(procstat); return (NULL); } procstat->type = PROCSTAT_KVM; procstat->kd = kd; return (procstat); } struct procstat * procstat_open_core(const char *filename) { struct procstat *procstat; struct procstat_core *core; procstat = calloc(1, sizeof(*procstat)); if (procstat == NULL) { warn("malloc()"); return (NULL); } core = procstat_core_open(filename); if (core == NULL) { free(procstat); return (NULL); } procstat->type = PROCSTAT_CORE; procstat->core = core; return (procstat); } struct kinfo_proc * procstat_getprocs(struct procstat *procstat, int what, int arg, unsigned int *count) { struct kinfo_proc *p0, *p; size_t len, olen; int name[4]; int cnt; int error; assert(procstat); assert(count); p = NULL; if (procstat->type == PROCSTAT_KVM) { *count = 0; p0 = kvm_getprocs(procstat->kd, what, arg, &cnt); if (p0 == NULL || cnt <= 0) return (NULL); *count = cnt; len = *count * sizeof(*p); p = malloc(len); if (p == NULL) { warnx("malloc(%zu)", len); goto fail; } bcopy(p0, p, len); return (p); } else if (procstat->type == PROCSTAT_SYSCTL) { len = 0; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = what; name[3] = arg; error = sysctl(name, nitems(name), NULL, &len, NULL, 0); if (error < 0 && errno != EPERM) { warn("sysctl(kern.proc)"); goto fail; } if (len == 0) { warnx("no processes?"); goto fail; } do { len += len / 10; p = reallocf(p, len); if (p == NULL) { warnx("reallocf(%zu)", len); goto fail; } olen = len; error = sysctl(name, nitems(name), p, &len, NULL, 0); } while (error < 0 && errno == ENOMEM && olen == len); if (error < 0 && errno != EPERM) { warn("sysctl(kern.proc)"); goto fail; } /* Perform simple consistency checks. */ if ((len % sizeof(*p)) != 0 || p->ki_structsize != sizeof(*p)) { warnx("kinfo_proc structure size mismatch (len = %zu)", len); goto fail; } *count = len / sizeof(*p); return (p); } else if (procstat->type == PROCSTAT_CORE) { p = procstat_core_get(procstat->core, PSC_TYPE_PROC, NULL, &len); if ((len % sizeof(*p)) != 0 || p->ki_structsize != sizeof(*p)) { warnx("kinfo_proc structure size mismatch"); goto fail; } *count = len / sizeof(*p); return (p); } else { warnx("unknown access method: %d", procstat->type); return (NULL); } fail: if (p) free(p); return (NULL); } void procstat_freeprocs(struct procstat *procstat __unused, struct kinfo_proc *p) { if (p != NULL) free(p); p = NULL; } struct filestat_list * procstat_getfiles(struct procstat *procstat, struct kinfo_proc *kp, int mmapped) { switch(procstat->type) { case PROCSTAT_KVM: return (procstat_getfiles_kvm(procstat, kp, mmapped)); case PROCSTAT_SYSCTL: case PROCSTAT_CORE: return (procstat_getfiles_sysctl(procstat, kp, mmapped)); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freefiles(struct procstat *procstat, struct filestat_list *head) { struct filestat *fst, *tmp; STAILQ_FOREACH_SAFE(fst, head, next, tmp) { if (fst->fs_path != NULL) free(fst->fs_path); free(fst); } free(head); if (procstat->vmentries != NULL) { free(procstat->vmentries); procstat->vmentries = NULL; } if (procstat->files != NULL) { free(procstat->files); procstat->files = NULL; } } static struct filestat * filestat_new_entry(void *typedep, int type, int fd, int fflags, int uflags, int refcount, off_t offset, char *path, cap_rights_t *cap_rightsp) { struct filestat *entry; entry = calloc(1, sizeof(*entry)); if (entry == NULL) { warn("malloc()"); return (NULL); } entry->fs_typedep = typedep; entry->fs_fflags = fflags; entry->fs_uflags = uflags; entry->fs_fd = fd; entry->fs_type = type; entry->fs_ref_count = refcount; entry->fs_offset = offset; entry->fs_path = path; if (cap_rightsp != NULL) entry->fs_cap_rights = *cap_rightsp; else cap_rights_init(&entry->fs_cap_rights); return (entry); } static struct vnode * getctty(kvm_t *kd, struct kinfo_proc *kp) { struct pgrp pgrp; struct proc proc; struct session sess; int error; assert(kp); error = kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc, sizeof(proc)); if (error == 0) { warnx("can't read proc struct at %p for pid %d", kp->ki_paddr, kp->ki_pid); return (NULL); } if (proc.p_pgrp == NULL) return (NULL); error = kvm_read_all(kd, (unsigned long)proc.p_pgrp, &pgrp, sizeof(pgrp)); if (error == 0) { warnx("can't read pgrp struct at %p for pid %d", proc.p_pgrp, kp->ki_pid); return (NULL); } error = kvm_read_all(kd, (unsigned long)pgrp.pg_session, &sess, sizeof(sess)); if (error == 0) { warnx("can't read session struct at %p for pid %d", pgrp.pg_session, kp->ki_pid); return (NULL); } return (sess.s_ttyvp); } static struct filestat_list * procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmapped) { struct file file; struct filedesc filed; struct vm_map_entry vmentry; struct vm_object object; struct vmspace vmspace; vm_map_entry_t entryp; vm_map_t map; vm_object_t objp; struct vnode *vp; struct file **ofiles; struct filestat *entry; struct filestat_list *head; kvm_t *kd; void *data; int i, fflags; int prot, type; unsigned int nfiles; assert(procstat); kd = procstat->kd; if (kd == NULL) return (NULL); if (kp->ki_fd == NULL) return (NULL); if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &filed, sizeof(filed))) { warnx("can't read filedesc at %p", (void *)kp->ki_fd); return (NULL); } /* * Allocate list head. */ head = malloc(sizeof(*head)); if (head == NULL) return (NULL); STAILQ_INIT(head); /* root directory vnode, if one. */ if (filed.fd_rdir) { entry = filestat_new_entry(filed.fd_rdir, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } /* current working directory vnode. */ if (filed.fd_cdir) { entry = filestat_new_entry(filed.fd_cdir, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } /* jail root, if any. */ if (filed.fd_jdir) { entry = filestat_new_entry(filed.fd_jdir, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } /* ktrace vnode, if one */ if (kp->ki_tracep) { entry = filestat_new_entry(kp->ki_tracep, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ | PS_FST_FFLAG_WRITE, PS_FST_UFLAG_TRACE, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } /* text vnode, if one */ if (kp->ki_textvp) { entry = filestat_new_entry(kp->ki_textvp, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ, PS_FST_UFLAG_TEXT, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } /* Controlling terminal. */ if ((vp = getctty(kd, kp)) != NULL) { entry = filestat_new_entry(vp, PS_FST_TYPE_VNODE, -1, PS_FST_FFLAG_READ | PS_FST_FFLAG_WRITE, PS_FST_UFLAG_CTTY, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } nfiles = filed.fd_lastfile + 1; ofiles = malloc(nfiles * sizeof(struct file *)); if (ofiles == NULL) { warn("malloc(%zu)", nfiles * sizeof(struct file *)); goto do_mmapped; } if (!kvm_read_all(kd, (unsigned long)filed.fd_ofiles, ofiles, nfiles * sizeof(struct file *))) { warnx("cannot read file structures at %p", (void *)filed.fd_ofiles); free(ofiles); goto do_mmapped; } for (i = 0; i <= filed.fd_lastfile; i++) { if (ofiles[i] == NULL) continue; if (!kvm_read_all(kd, (unsigned long)ofiles[i], &file, sizeof(struct file))) { warnx("can't read file %d at %p", i, (void *)ofiles[i]); continue; } switch (file.f_type) { case DTYPE_VNODE: type = PS_FST_TYPE_VNODE; data = file.f_vnode; break; case DTYPE_SOCKET: type = PS_FST_TYPE_SOCKET; data = file.f_data; break; case DTYPE_PIPE: type = PS_FST_TYPE_PIPE; data = file.f_data; break; case DTYPE_FIFO: type = PS_FST_TYPE_FIFO; data = file.f_vnode; break; #ifdef DTYPE_PTS case DTYPE_PTS: type = PS_FST_TYPE_PTS; data = file.f_data; break; #endif case DTYPE_SEM: type = PS_FST_TYPE_SEM; data = file.f_data; break; case DTYPE_SHM: type = PS_FST_TYPE_SHM; data = file.f_data; break; default: continue; } /* XXXRW: No capability rights support for kvm yet. */ entry = filestat_new_entry(data, type, i, to_filestat_flags(file.f_flag), 0, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } free(ofiles); do_mmapped: /* * Process mmapped files if requested. */ if (mmapped) { if (!kvm_read_all(kd, (unsigned long)kp->ki_vmspace, &vmspace, sizeof(vmspace))) { warnx("can't read vmspace at %p", (void *)kp->ki_vmspace); goto exit; } map = &vmspace.vm_map; for (entryp = map->header.next; entryp != &kp->ki_vmspace->vm_map.header; entryp = vmentry.next) { if (!kvm_read_all(kd, (unsigned long)entryp, &vmentry, sizeof(vmentry))) { warnx("can't read vm_map_entry at %p", (void *)entryp); continue; } if (vmentry.eflags & MAP_ENTRY_IS_SUB_MAP) continue; if ((objp = vmentry.object.vm_object) == NULL) continue; for (; objp; objp = object.backing_object) { if (!kvm_read_all(kd, (unsigned long)objp, &object, sizeof(object))) { warnx("can't read vm_object at %p", (void *)objp); break; } } /* We want only vnode objects. */ if (object.type != OBJT_VNODE) continue; prot = vmentry.protection; fflags = 0; if (prot & VM_PROT_READ) fflags = PS_FST_FFLAG_READ; if ((vmentry.eflags & MAP_ENTRY_COW) == 0 && prot & VM_PROT_WRITE) fflags |= PS_FST_FFLAG_WRITE; /* * Create filestat entry. */ entry = filestat_new_entry(object.handle, PS_FST_TYPE_VNODE, -1, fflags, PS_FST_UFLAG_MMAP, 0, 0, NULL, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } } exit: return (head); } /* * kinfo types to filestat translation. */ static int kinfo_type2fst(int kftype) { static struct { int kf_type; int fst_type; } kftypes2fst[] = { { KF_TYPE_CRYPTO, PS_FST_TYPE_CRYPTO }, { KF_TYPE_FIFO, PS_FST_TYPE_FIFO }, { KF_TYPE_KQUEUE, PS_FST_TYPE_KQUEUE }, { KF_TYPE_MQUEUE, PS_FST_TYPE_MQUEUE }, { KF_TYPE_NONE, PS_FST_TYPE_NONE }, { KF_TYPE_PIPE, PS_FST_TYPE_PIPE }, { KF_TYPE_PTS, PS_FST_TYPE_PTS }, { KF_TYPE_SEM, PS_FST_TYPE_SEM }, { KF_TYPE_SHM, PS_FST_TYPE_SHM }, { KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET }, { KF_TYPE_VNODE, PS_FST_TYPE_VNODE }, { KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN } }; #define NKFTYPES (sizeof(kftypes2fst) / sizeof(*kftypes2fst)) unsigned int i; for (i = 0; i < NKFTYPES; i++) if (kftypes2fst[i].kf_type == kftype) break; if (i == NKFTYPES) return (PS_FST_TYPE_UNKNOWN); return (kftypes2fst[i].fst_type); } /* * kinfo flags to filestat translation. */ static int kinfo_fflags2fst(int kfflags) { static struct { int kf_flag; int fst_flag; } kfflags2fst[] = { { KF_FLAG_APPEND, PS_FST_FFLAG_APPEND }, { KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC }, { KF_FLAG_CREAT, PS_FST_FFLAG_CREAT }, { KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT }, { KF_FLAG_EXCL, PS_FST_FFLAG_EXCL }, { KF_FLAG_EXEC, PS_FST_FFLAG_EXEC }, { KF_FLAG_EXLOCK, PS_FST_FFLAG_EXLOCK }, { KF_FLAG_FSYNC, PS_FST_FFLAG_SYNC }, { KF_FLAG_HASLOCK, PS_FST_FFLAG_HASLOCK }, { KF_FLAG_NOFOLLOW, PS_FST_FFLAG_NOFOLLOW }, { KF_FLAG_NONBLOCK, PS_FST_FFLAG_NONBLOCK }, { KF_FLAG_READ, PS_FST_FFLAG_READ }, { KF_FLAG_SHLOCK, PS_FST_FFLAG_SHLOCK }, { KF_FLAG_TRUNC, PS_FST_FFLAG_TRUNC }, { KF_FLAG_WRITE, PS_FST_FFLAG_WRITE } }; #define NKFFLAGS (sizeof(kfflags2fst) / sizeof(*kfflags2fst)) unsigned int i; int flags; flags = 0; for (i = 0; i < NKFFLAGS; i++) if ((kfflags & kfflags2fst[i].kf_flag) != 0) flags |= kfflags2fst[i].fst_flag; return (flags); } static int kinfo_uflags2fst(int fd) { switch (fd) { case KF_FD_TYPE_CTTY: return (PS_FST_UFLAG_CTTY); case KF_FD_TYPE_CWD: return (PS_FST_UFLAG_CDIR); case KF_FD_TYPE_JAIL: return (PS_FST_UFLAG_JAIL); case KF_FD_TYPE_TEXT: return (PS_FST_UFLAG_TEXT); case KF_FD_TYPE_TRACE: return (PS_FST_UFLAG_TRACE); case KF_FD_TYPE_ROOT: return (PS_FST_UFLAG_RDIR); } return (0); } static struct kinfo_file * kinfo_getfile_core(struct procstat_core *core, int *cntp) { int cnt; size_t len; char *buf, *bp, *eb; struct kinfo_file *kif, *kp, *kf; buf = procstat_core_get(core, PSC_TYPE_FILES, NULL, &len); if (buf == NULL) return (NULL); /* * XXXMG: The code below is just copy&past from libutil. * The code duplication can be avoided if libutil * is extended to provide something like: * struct kinfo_file *kinfo_getfile_from_buf(const char *buf, * size_t len, int *cntp); */ /* Pass 1: count items */ cnt = 0; bp = buf; eb = buf + len; while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; if (kf->kf_structsize == 0) break; bp += kf->kf_structsize; cnt++; } kif = calloc(cnt, sizeof(*kif)); if (kif == NULL) { free(buf); return (NULL); } bp = buf; eb = buf + len; kp = kif; /* Pass 2: unpack */ while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; if (kf->kf_structsize == 0) break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kf, kf->kf_structsize); /* Advance to next packed record */ bp += kf->kf_structsize; /* Set field size to fixed length, advance */ kp->kf_structsize = sizeof(*kp); kp++; } free(buf); *cntp = cnt; return (kif); /* Caller must free() return value */ } static struct filestat_list * procstat_getfiles_sysctl(struct procstat *procstat, struct kinfo_proc *kp, int mmapped) { struct kinfo_file *kif, *files; struct kinfo_vmentry *kve, *vmentries; struct filestat_list *head; struct filestat *entry; char *path; off_t offset; int cnt, fd, fflags; int i, type, uflags; int refcount; cap_rights_t cap_rights; assert(kp); if (kp->ki_fd == NULL) return (NULL); switch(procstat->type) { case PROCSTAT_SYSCTL: files = kinfo_getfile(kp->ki_pid, &cnt); break; case PROCSTAT_CORE: files = kinfo_getfile_core(procstat->core, &cnt); break; default: assert(!"invalid type"); } if (files == NULL && errno != EPERM) { warn("kinfo_getfile()"); return (NULL); } procstat->files = files; /* * Allocate list head. */ head = malloc(sizeof(*head)); if (head == NULL) return (NULL); STAILQ_INIT(head); for (i = 0; i < cnt; i++) { kif = &files[i]; type = kinfo_type2fst(kif->kf_type); fd = kif->kf_fd >= 0 ? kif->kf_fd : -1; fflags = kinfo_fflags2fst(kif->kf_flags); uflags = kinfo_uflags2fst(kif->kf_fd); refcount = kif->kf_ref_count; offset = kif->kf_offset; if (*kif->kf_path != '\0') path = strdup(kif->kf_path); else path = NULL; cap_rights = kif->kf_cap_rights; /* * Create filestat entry. */ entry = filestat_new_entry(kif, type, fd, fflags, uflags, refcount, offset, path, &cap_rights); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } if (mmapped != 0) { vmentries = procstat_getvmmap(procstat, kp, &cnt); procstat->vmentries = vmentries; if (vmentries == NULL || cnt == 0) goto fail; for (i = 0; i < cnt; i++) { kve = &vmentries[i]; if (kve->kve_type != KVME_TYPE_VNODE) continue; fflags = 0; if (kve->kve_protection & KVME_PROT_READ) fflags = PS_FST_FFLAG_READ; if ((kve->kve_flags & KVME_FLAG_COW) == 0 && kve->kve_protection & KVME_PROT_WRITE) fflags |= PS_FST_FFLAG_WRITE; offset = kve->kve_offset; refcount = kve->kve_ref_count; if (*kve->kve_path != '\0') path = strdup(kve->kve_path); else path = NULL; entry = filestat_new_entry(kve, PS_FST_TYPE_VNODE, -1, fflags, PS_FST_UFLAG_MMAP, refcount, offset, path, NULL); if (entry != NULL) STAILQ_INSERT_TAIL(head, entry, next); } } fail: return (head); } int procstat_get_pipe_info(struct procstat *procstat, struct filestat *fst, struct pipestat *ps, char *errbuf) { assert(ps); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_pipe_info_kvm(procstat->kd, fst, ps, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_pipe_info_sysctl(fst, ps, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_pipe_info_kvm(kvm_t *kd, struct filestat *fst, struct pipestat *ps, char *errbuf) { struct pipe pi; void *pipep; assert(kd); assert(ps); assert(fst); bzero(ps, sizeof(*ps)); pipep = fst->fs_typedep; if (pipep == NULL) goto fail; if (!kvm_read_all(kd, (unsigned long)pipep, &pi, sizeof(struct pipe))) { warnx("can't read pipe at %p", (void *)pipep); goto fail; } ps->addr = (uintptr_t)pipep; ps->peer = (uintptr_t)pi.pipe_peer; ps->buffer_cnt = pi.pipe_buffer.cnt; return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } static int procstat_get_pipe_info_sysctl(struct filestat *fst, struct pipestat *ps, char *errbuf __unused) { struct kinfo_file *kif; assert(ps); assert(fst); bzero(ps, sizeof(*ps)); kif = fst->fs_typedep; if (kif == NULL) return (1); ps->addr = kif->kf_un.kf_pipe.kf_pipe_addr; ps->peer = kif->kf_un.kf_pipe.kf_pipe_peer; ps->buffer_cnt = kif->kf_un.kf_pipe.kf_pipe_buffer_cnt; return (0); } int procstat_get_pts_info(struct procstat *procstat, struct filestat *fst, struct ptsstat *pts, char *errbuf) { assert(pts); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_pts_info_kvm(procstat->kd, fst, pts, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_pts_info_sysctl(fst, pts, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_pts_info_kvm(kvm_t *kd, struct filestat *fst, struct ptsstat *pts, char *errbuf) { struct tty tty; void *ttyp; assert(kd); assert(pts); assert(fst); bzero(pts, sizeof(*pts)); ttyp = fst->fs_typedep; if (ttyp == NULL) goto fail; if (!kvm_read_all(kd, (unsigned long)ttyp, &tty, sizeof(struct tty))) { warnx("can't read tty at %p", (void *)ttyp); goto fail; } pts->dev = dev2udev(kd, tty.t_dev); (void)kdevtoname(kd, tty.t_dev, pts->devname); return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } static int procstat_get_pts_info_sysctl(struct filestat *fst, struct ptsstat *pts, char *errbuf __unused) { struct kinfo_file *kif; assert(pts); assert(fst); bzero(pts, sizeof(*pts)); kif = fst->fs_typedep; if (kif == NULL) return (0); pts->dev = kif->kf_un.kf_pts.kf_pts_dev; strlcpy(pts->devname, kif->kf_path, sizeof(pts->devname)); return (0); } int procstat_get_sem_info(struct procstat *procstat, struct filestat *fst, struct semstat *sem, char *errbuf) { assert(sem); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_sem_info_kvm(procstat->kd, fst, sem, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_sem_info_sysctl(fst, sem, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_sem_info_kvm(kvm_t *kd, struct filestat *fst, struct semstat *sem, char *errbuf) { struct ksem ksem; void *ksemp; char *path; int i; assert(kd); assert(sem); assert(fst); bzero(sem, sizeof(*sem)); ksemp = fst->fs_typedep; if (ksemp == NULL) goto fail; if (!kvm_read_all(kd, (unsigned long)ksemp, &ksem, sizeof(struct ksem))) { warnx("can't read ksem at %p", (void *)ksemp); goto fail; } sem->mode = S_IFREG | ksem.ks_mode; sem->value = ksem.ks_value; if (fst->fs_path == NULL && ksem.ks_path != NULL) { path = malloc(MAXPATHLEN); for (i = 0; i < MAXPATHLEN - 1; i++) { if (!kvm_read_all(kd, (unsigned long)ksem.ks_path + i, path + i, 1)) break; if (path[i] == '\0') break; } path[i] = '\0'; if (i == 0) free(path); else fst->fs_path = path; } return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } static int procstat_get_sem_info_sysctl(struct filestat *fst, struct semstat *sem, char *errbuf __unused) { struct kinfo_file *kif; assert(sem); assert(fst); bzero(sem, sizeof(*sem)); kif = fst->fs_typedep; if (kif == NULL) return (0); sem->value = kif->kf_un.kf_sem.kf_sem_value; sem->mode = kif->kf_un.kf_sem.kf_sem_mode; return (0); } int procstat_get_shm_info(struct procstat *procstat, struct filestat *fst, struct shmstat *shm, char *errbuf) { assert(shm); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_shm_info_kvm(procstat->kd, fst, shm, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_shm_info_sysctl(fst, shm, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_shm_info_kvm(kvm_t *kd, struct filestat *fst, struct shmstat *shm, char *errbuf) { struct shmfd shmfd; void *shmfdp; char *path; int i; assert(kd); assert(shm); assert(fst); bzero(shm, sizeof(*shm)); shmfdp = fst->fs_typedep; if (shmfdp == NULL) goto fail; if (!kvm_read_all(kd, (unsigned long)shmfdp, &shmfd, sizeof(struct shmfd))) { warnx("can't read shmfd at %p", (void *)shmfdp); goto fail; } shm->mode = S_IFREG | shmfd.shm_mode; shm->size = shmfd.shm_size; if (fst->fs_path == NULL && shmfd.shm_path != NULL) { path = malloc(MAXPATHLEN); for (i = 0; i < MAXPATHLEN - 1; i++) { if (!kvm_read_all(kd, (unsigned long)shmfd.shm_path + i, path + i, 1)) break; if (path[i] == '\0') break; } path[i] = '\0'; if (i == 0) free(path); else fst->fs_path = path; } return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } static int procstat_get_shm_info_sysctl(struct filestat *fst, struct shmstat *shm, char *errbuf __unused) { struct kinfo_file *kif; assert(shm); assert(fst); bzero(shm, sizeof(*shm)); kif = fst->fs_typedep; if (kif == NULL) return (0); shm->size = kif->kf_un.kf_file.kf_file_size; shm->mode = kif->kf_un.kf_file.kf_file_mode; return (0); } int procstat_get_vnode_info(struct procstat *procstat, struct filestat *fst, struct vnstat *vn, char *errbuf) { assert(vn); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_vnode_info_kvm(procstat->kd, fst, vn, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_vnode_info_sysctl(fst, vn, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_vnode_info_kvm(kvm_t *kd, struct filestat *fst, struct vnstat *vn, char *errbuf) { /* Filesystem specific handlers. */ #define FSTYPE(fst) {#fst, fst##_filestat} struct { const char *tag; int (*handler)(kvm_t *kd, struct vnode *vp, struct vnstat *vn); } fstypes[] = { FSTYPE(devfs), FSTYPE(isofs), FSTYPE(msdosfs), FSTYPE(nfs), FSTYPE(smbfs), FSTYPE(udf), FSTYPE(ufs), #ifdef LIBPROCSTAT_ZFS FSTYPE(zfs), #endif }; #define NTYPES (sizeof(fstypes) / sizeof(*fstypes)) struct vnode vnode; char tagstr[12]; void *vp; int error; unsigned int i; assert(kd); assert(vn); assert(fst); vp = fst->fs_typedep; if (vp == NULL) goto fail; error = kvm_read_all(kd, (unsigned long)vp, &vnode, sizeof(vnode)); if (error == 0) { warnx("can't read vnode at %p", (void *)vp); goto fail; } bzero(vn, sizeof(*vn)); vn->vn_type = vntype2psfsttype(vnode.v_type); if (vnode.v_type == VNON || vnode.v_type == VBAD) return (0); error = kvm_read_all(kd, (unsigned long)vnode.v_tag, tagstr, sizeof(tagstr)); if (error == 0) { warnx("can't read v_tag at %p", (void *)vp); goto fail; } tagstr[sizeof(tagstr) - 1] = '\0'; /* * Find appropriate handler. */ for (i = 0; i < NTYPES; i++) if (!strcmp(fstypes[i].tag, tagstr)) { if (fstypes[i].handler(kd, &vnode, vn) != 0) { goto fail; } break; } if (i == NTYPES) { if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "?(%s)", tagstr); return (1); } vn->vn_mntdir = getmnton(kd, vnode.v_mount); if ((vnode.v_type == VBLK || vnode.v_type == VCHR) && vnode.v_rdev != NULL){ vn->vn_dev = dev2udev(kd, vnode.v_rdev); (void)kdevtoname(kd, vnode.v_rdev, vn->vn_devname); } else { vn->vn_dev = -1; } return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } /* * kinfo vnode type to filestat translation. */ static int kinfo_vtype2fst(int kfvtype) { static struct { int kf_vtype; int fst_vtype; } kfvtypes2fst[] = { { KF_VTYPE_VBAD, PS_FST_VTYPE_VBAD }, { KF_VTYPE_VBLK, PS_FST_VTYPE_VBLK }, { KF_VTYPE_VCHR, PS_FST_VTYPE_VCHR }, { KF_VTYPE_VDIR, PS_FST_VTYPE_VDIR }, { KF_VTYPE_VFIFO, PS_FST_VTYPE_VFIFO }, { KF_VTYPE_VLNK, PS_FST_VTYPE_VLNK }, { KF_VTYPE_VNON, PS_FST_VTYPE_VNON }, { KF_VTYPE_VREG, PS_FST_VTYPE_VREG }, { KF_VTYPE_VSOCK, PS_FST_VTYPE_VSOCK } }; #define NKFVTYPES (sizeof(kfvtypes2fst) / sizeof(*kfvtypes2fst)) unsigned int i; for (i = 0; i < NKFVTYPES; i++) if (kfvtypes2fst[i].kf_vtype == kfvtype) break; if (i == NKFVTYPES) return (PS_FST_VTYPE_UNKNOWN); return (kfvtypes2fst[i].fst_vtype); } static int procstat_get_vnode_info_sysctl(struct filestat *fst, struct vnstat *vn, char *errbuf) { struct statfs stbuf; struct kinfo_file *kif; struct kinfo_vmentry *kve; + char *name, *path; uint64_t fileid; uint64_t size; - char *name, *path; - uint32_t fsid; + uint64_t fsid; + uint64_t rdev; uint16_t mode; - uint32_t rdev; int vntype; int status; assert(fst); assert(vn); bzero(vn, sizeof(*vn)); if (fst->fs_typedep == NULL) return (1); if (fst->fs_uflags & PS_FST_UFLAG_MMAP) { kve = fst->fs_typedep; fileid = kve->kve_vn_fileid; fsid = kve->kve_vn_fsid; mode = kve->kve_vn_mode; path = kve->kve_path; rdev = kve->kve_vn_rdev; size = kve->kve_vn_size; vntype = kinfo_vtype2fst(kve->kve_vn_type); status = kve->kve_status; } else { kif = fst->fs_typedep; fileid = kif->kf_un.kf_file.kf_file_fileid; fsid = kif->kf_un.kf_file.kf_file_fsid; mode = kif->kf_un.kf_file.kf_file_mode; path = kif->kf_path; rdev = kif->kf_un.kf_file.kf_file_rdev; size = kif->kf_un.kf_file.kf_file_size; vntype = kinfo_vtype2fst(kif->kf_vnode_type); status = kif->kf_status; } vn->vn_type = vntype; if (vntype == PS_FST_VTYPE_VNON || vntype == PS_FST_VTYPE_VBAD) return (0); if ((status & KF_ATTR_VALID) == 0) { if (errbuf != NULL) { snprintf(errbuf, _POSIX2_LINE_MAX, "? (no info available)"); } return (1); } if (path && *path) { statfs(path, &stbuf); vn->vn_mntdir = strdup(stbuf.f_mntonname); } else vn->vn_mntdir = strdup("-"); vn->vn_dev = rdev; if (vntype == PS_FST_VTYPE_VBLK) { name = devname(rdev, S_IFBLK); if (name != NULL) strlcpy(vn->vn_devname, name, sizeof(vn->vn_devname)); } else if (vntype == PS_FST_VTYPE_VCHR) { name = devname(vn->vn_dev, S_IFCHR); if (name != NULL) strlcpy(vn->vn_devname, name, sizeof(vn->vn_devname)); } vn->vn_fsid = fsid; vn->vn_fileid = fileid; vn->vn_size = size; vn->vn_mode = mode; return (0); } int procstat_get_socket_info(struct procstat *procstat, struct filestat *fst, struct sockstat *sock, char *errbuf) { assert(sock); if (procstat->type == PROCSTAT_KVM) { return (procstat_get_socket_info_kvm(procstat->kd, fst, sock, errbuf)); } else if (procstat->type == PROCSTAT_SYSCTL || procstat->type == PROCSTAT_CORE) { return (procstat_get_socket_info_sysctl(fst, sock, errbuf)); } else { warnx("unknown access method: %d", procstat->type); if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } } static int procstat_get_socket_info_kvm(kvm_t *kd, struct filestat *fst, struct sockstat *sock, char *errbuf) { struct domain dom; struct inpcb inpcb; struct protosw proto; struct socket s; struct unpcb unpcb; ssize_t len; void *so; assert(kd); assert(sock); assert(fst); bzero(sock, sizeof(*sock)); so = fst->fs_typedep; if (so == NULL) goto fail; sock->so_addr = (uintptr_t)so; /* fill in socket */ if (!kvm_read_all(kd, (unsigned long)so, &s, sizeof(struct socket))) { warnx("can't read sock at %p", (void *)so); goto fail; } /* fill in protosw entry */ if (!kvm_read_all(kd, (unsigned long)s.so_proto, &proto, sizeof(struct protosw))) { warnx("can't read protosw at %p", (void *)s.so_proto); goto fail; } /* fill in domain */ if (!kvm_read_all(kd, (unsigned long)proto.pr_domain, &dom, sizeof(struct domain))) { warnx("can't read domain at %p", (void *)proto.pr_domain); goto fail; } if ((len = kvm_read(kd, (unsigned long)dom.dom_name, sock->dname, sizeof(sock->dname) - 1)) < 0) { warnx("can't read domain name at %p", (void *)dom.dom_name); sock->dname[0] = '\0'; } else sock->dname[len] = '\0'; /* * Fill in known data. */ sock->type = s.so_type; sock->proto = proto.pr_protocol; sock->dom_family = dom.dom_family; sock->so_pcb = (uintptr_t)s.so_pcb; /* * Protocol specific data. */ switch(dom.dom_family) { case AF_INET: case AF_INET6: if (proto.pr_protocol == IPPROTO_TCP) { if (s.so_pcb) { if (kvm_read(kd, (u_long)s.so_pcb, (char *)&inpcb, sizeof(struct inpcb)) != sizeof(struct inpcb)) { warnx("can't read inpcb at %p", (void *)s.so_pcb); } else sock->inp_ppcb = (uintptr_t)inpcb.inp_ppcb; } } break; case AF_UNIX: if (s.so_pcb) { if (kvm_read(kd, (u_long)s.so_pcb, (char *)&unpcb, sizeof(struct unpcb)) != sizeof(struct unpcb)){ warnx("can't read unpcb at %p", (void *)s.so_pcb); } else if (unpcb.unp_conn) { sock->so_rcv_sb_state = s.so_rcv.sb_state; sock->so_snd_sb_state = s.so_snd.sb_state; sock->unp_conn = (uintptr_t)unpcb.unp_conn; } } break; default: break; } return (0); fail: if (errbuf != NULL) snprintf(errbuf, _POSIX2_LINE_MAX, "error"); return (1); } static int procstat_get_socket_info_sysctl(struct filestat *fst, struct sockstat *sock, char *errbuf __unused) { struct kinfo_file *kif; assert(sock); assert(fst); bzero(sock, sizeof(*sock)); kif = fst->fs_typedep; if (kif == NULL) return (0); /* * Fill in known data. */ sock->type = kif->kf_sock_type; sock->proto = kif->kf_sock_protocol; sock->dom_family = kif->kf_sock_domain; sock->so_pcb = kif->kf_un.kf_sock.kf_sock_pcb; strlcpy(sock->dname, kif->kf_path, sizeof(sock->dname)); - bcopy(&kif->kf_sa_local, &sock->sa_local, kif->kf_sa_local.ss_len); - bcopy(&kif->kf_sa_peer, &sock->sa_peer, kif->kf_sa_peer.ss_len); + bcopy(&kif->kf_un.kf_sock.kf_sa_local, &sock->sa_local, + kif->kf_un.kf_sock.kf_sa_local.ss_len); + bcopy(&kif->kf_un.kf_sock.kf_sa_peer, &sock->sa_peer, + kif->kf_un.kf_sock.kf_sa_peer.ss_len); /* * Protocol specific data. */ switch(sock->dom_family) { case AF_INET: case AF_INET6: if (sock->proto == IPPROTO_TCP) sock->inp_ppcb = kif->kf_un.kf_sock.kf_sock_inpcb; break; case AF_UNIX: if (kif->kf_un.kf_sock.kf_sock_unpconn != 0) { sock->so_rcv_sb_state = kif->kf_un.kf_sock.kf_sock_rcv_sb_state; sock->so_snd_sb_state = kif->kf_un.kf_sock.kf_sock_snd_sb_state; sock->unp_conn = kif->kf_un.kf_sock.kf_sock_unpconn; } break; default: break; } return (0); } /* * Descriptor flags to filestat translation. */ static int to_filestat_flags(int flags) { static struct { int flag; int fst_flag; } fstflags[] = { { FREAD, PS_FST_FFLAG_READ }, { FWRITE, PS_FST_FFLAG_WRITE }, { O_APPEND, PS_FST_FFLAG_APPEND }, { O_ASYNC, PS_FST_FFLAG_ASYNC }, { O_CREAT, PS_FST_FFLAG_CREAT }, { O_DIRECT, PS_FST_FFLAG_DIRECT }, { O_EXCL, PS_FST_FFLAG_EXCL }, { O_EXEC, PS_FST_FFLAG_EXEC }, { O_EXLOCK, PS_FST_FFLAG_EXLOCK }, { O_NOFOLLOW, PS_FST_FFLAG_NOFOLLOW }, { O_NONBLOCK, PS_FST_FFLAG_NONBLOCK }, { O_SHLOCK, PS_FST_FFLAG_SHLOCK }, { O_SYNC, PS_FST_FFLAG_SYNC }, { O_TRUNC, PS_FST_FFLAG_TRUNC } }; #define NFSTFLAGS (sizeof(fstflags) / sizeof(*fstflags)) int fst_flags; unsigned int i; fst_flags = 0; for (i = 0; i < NFSTFLAGS; i++) if (flags & fstflags[i].flag) fst_flags |= fstflags[i].fst_flag; return (fst_flags); } /* * Vnode type to filestate translation. */ static int vntype2psfsttype(int type) { static struct { int vtype; int fst_vtype; } vt2fst[] = { { VBAD, PS_FST_VTYPE_VBAD }, { VBLK, PS_FST_VTYPE_VBLK }, { VCHR, PS_FST_VTYPE_VCHR }, { VDIR, PS_FST_VTYPE_VDIR }, { VFIFO, PS_FST_VTYPE_VFIFO }, { VLNK, PS_FST_VTYPE_VLNK }, { VNON, PS_FST_VTYPE_VNON }, { VREG, PS_FST_VTYPE_VREG }, { VSOCK, PS_FST_VTYPE_VSOCK } }; #define NVFTYPES (sizeof(vt2fst) / sizeof(*vt2fst)) unsigned int i, fst_type; fst_type = PS_FST_VTYPE_UNKNOWN; for (i = 0; i < NVFTYPES; i++) { if (type == vt2fst[i].vtype) { fst_type = vt2fst[i].fst_vtype; break; } } return (fst_type); } static char * getmnton(kvm_t *kd, struct mount *m) { struct mount mnt; static struct mtab { struct mtab *next; struct mount *m; char mntonname[MNAMELEN + 1]; } *mhead = NULL; struct mtab *mt; for (mt = mhead; mt != NULL; mt = mt->next) if (m == mt->m) return (mt->mntonname); if (!kvm_read_all(kd, (unsigned long)m, &mnt, sizeof(struct mount))) { warnx("can't read mount table at %p", (void *)m); return (NULL); } if ((mt = malloc(sizeof (struct mtab))) == NULL) err(1, NULL); mt->m = m; bcopy(&mnt.mnt_stat.f_mntonname[0], &mt->mntonname[0], MNAMELEN); mt->mntonname[MNAMELEN] = '\0'; mt->next = mhead; mhead = mt; return (mt->mntonname); } /* * Auxiliary structures and functions to get process environment or * command line arguments. */ struct argvec { char *buf; size_t bufsize; char **argv; size_t argc; }; static struct argvec * argvec_alloc(size_t bufsize) { struct argvec *av; av = malloc(sizeof(*av)); if (av == NULL) return (NULL); av->bufsize = bufsize; av->buf = malloc(av->bufsize); if (av->buf == NULL) { free(av); return (NULL); } av->argc = 32; av->argv = malloc(sizeof(char *) * av->argc); if (av->argv == NULL) { free(av->buf); free(av); return (NULL); } return av; } static void argvec_free(struct argvec * av) { free(av->argv); free(av->buf); free(av); } static char ** getargv(struct procstat *procstat, struct kinfo_proc *kp, size_t nchr, int env) { int error, name[4], argc, i; struct argvec *av, **avp; enum psc_type type; size_t len; char *p, **argv; assert(procstat); assert(kp); if (procstat->type == PROCSTAT_KVM) { warnx("can't use kvm access method"); return (NULL); } if (procstat->type != PROCSTAT_SYSCTL && procstat->type != PROCSTAT_CORE) { warnx("unknown access method: %d", procstat->type); return (NULL); } if (nchr == 0 || nchr > ARG_MAX) nchr = ARG_MAX; avp = (struct argvec **)(env ? &procstat->argv : &procstat->envv); av = *avp; if (av == NULL) { av = argvec_alloc(nchr); if (av == NULL) { warn("malloc(%zu)", nchr); return (NULL); } *avp = av; } else if (av->bufsize < nchr) { av->buf = reallocf(av->buf, nchr); if (av->buf == NULL) { warn("malloc(%zu)", nchr); return (NULL); } } if (procstat->type == PROCSTAT_SYSCTL) { name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = env ? KERN_PROC_ENV : KERN_PROC_ARGS; name[3] = kp->ki_pid; len = nchr; error = sysctl(name, nitems(name), av->buf, &len, NULL, 0); if (error != 0 && errno != ESRCH && errno != EPERM) warn("sysctl(kern.proc.%s)", env ? "env" : "args"); if (error != 0 || len == 0) return (NULL); } else /* procstat->type == PROCSTAT_CORE */ { type = env ? PSC_TYPE_ENVV : PSC_TYPE_ARGV; len = nchr; if (procstat_core_get(procstat->core, type, av->buf, &len) == NULL) { return (NULL); } } argv = av->argv; argc = av->argc; i = 0; for (p = av->buf; p < av->buf + len; p += strlen(p) + 1) { argv[i++] = p; if (i < argc) continue; /* Grow argv. */ argc += argc; argv = realloc(argv, sizeof(char *) * argc); if (argv == NULL) { warn("malloc(%zu)", sizeof(char *) * argc); return (NULL); } av->argv = argv; av->argc = argc; } argv[i] = NULL; return (argv); } /* * Return process command line arguments. */ char ** procstat_getargv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr) { return (getargv(procstat, p, nchr, 0)); } /* * Free the buffer allocated by procstat_getargv(). */ void procstat_freeargv(struct procstat *procstat) { if (procstat->argv != NULL) { argvec_free(procstat->argv); procstat->argv = NULL; } } /* * Return process environment. */ char ** procstat_getenvv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr) { return (getargv(procstat, p, nchr, 1)); } /* * Free the buffer allocated by procstat_getenvv(). */ void procstat_freeenvv(struct procstat *procstat) { if (procstat->envv != NULL) { argvec_free(procstat->envv); procstat->envv = NULL; } } static struct kinfo_vmentry * kinfo_getvmmap_core(struct procstat_core *core, int *cntp) { int cnt; size_t len; char *buf, *bp, *eb; struct kinfo_vmentry *kiv, *kp, *kv; buf = procstat_core_get(core, PSC_TYPE_VMMAP, NULL, &len); if (buf == NULL) return (NULL); /* * XXXMG: The code below is just copy&past from libutil. * The code duplication can be avoided if libutil * is extended to provide something like: * struct kinfo_vmentry *kinfo_getvmmap_from_buf(const char *buf, * size_t len, int *cntp); */ /* Pass 1: count items */ cnt = 0; bp = buf; eb = buf + len; while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; if (kv->kve_structsize == 0) break; bp += kv->kve_structsize; cnt++; } kiv = calloc(cnt, sizeof(*kiv)); if (kiv == NULL) { free(buf); return (NULL); } bp = buf; eb = buf + len; kp = kiv; /* Pass 2: unpack */ while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; if (kv->kve_structsize == 0) break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kv, kv->kve_structsize); /* Advance to next packed record */ bp += kv->kve_structsize; /* Set field size to fixed length, advance */ kp->kve_structsize = sizeof(*kp); kp++; } free(buf); *cntp = cnt; return (kiv); /* Caller must free() return value */ } struct kinfo_vmentry * procstat_getvmmap(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *cntp) { switch(procstat->type) { case PROCSTAT_KVM: warnx("kvm method is not supported"); return (NULL); case PROCSTAT_SYSCTL: return (kinfo_getvmmap(kp->ki_pid, cntp)); case PROCSTAT_CORE: return (kinfo_getvmmap_core(procstat->core, cntp)); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freevmmap(struct procstat *procstat __unused, struct kinfo_vmentry *vmmap) { free(vmmap); } static gid_t * procstat_getgroups_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned int *cntp) { struct proc proc; struct ucred ucred; gid_t *groups; size_t len; assert(kd != NULL); assert(kp != NULL); if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc, sizeof(proc))) { warnx("can't read proc struct at %p for pid %d", kp->ki_paddr, kp->ki_pid); return (NULL); } if (proc.p_ucred == NOCRED) return (NULL); if (!kvm_read_all(kd, (unsigned long)proc.p_ucred, &ucred, sizeof(ucred))) { warnx("can't read ucred struct at %p for pid %d", proc.p_ucred, kp->ki_pid); return (NULL); } len = ucred.cr_ngroups * sizeof(gid_t); groups = malloc(len); if (groups == NULL) { warn("malloc(%zu)", len); return (NULL); } if (!kvm_read_all(kd, (unsigned long)ucred.cr_groups, groups, len)) { warnx("can't read groups at %p for pid %d", ucred.cr_groups, kp->ki_pid); free(groups); return (NULL); } *cntp = ucred.cr_ngroups; return (groups); } static gid_t * procstat_getgroups_sysctl(pid_t pid, unsigned int *cntp) { int mib[4]; size_t len; gid_t *groups; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_GROUPS; mib[3] = pid; len = (sysconf(_SC_NGROUPS_MAX) + 1) * sizeof(gid_t); groups = malloc(len); if (groups == NULL) { warn("malloc(%zu)", len); return (NULL); } if (sysctl(mib, nitems(mib), groups, &len, NULL, 0) == -1) { warn("sysctl: kern.proc.groups: %d", pid); free(groups); return (NULL); } *cntp = len / sizeof(gid_t); return (groups); } static gid_t * procstat_getgroups_core(struct procstat_core *core, unsigned int *cntp) { size_t len; gid_t *groups; groups = procstat_core_get(core, PSC_TYPE_GROUPS, NULL, &len); if (groups == NULL) return (NULL); *cntp = len / sizeof(gid_t); return (groups); } gid_t * procstat_getgroups(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *cntp) { switch(procstat->type) { case PROCSTAT_KVM: return (procstat_getgroups_kvm(procstat->kd, kp, cntp)); case PROCSTAT_SYSCTL: return (procstat_getgroups_sysctl(kp->ki_pid, cntp)); case PROCSTAT_CORE: return (procstat_getgroups_core(procstat->core, cntp)); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freegroups(struct procstat *procstat __unused, gid_t *groups) { free(groups); } static int procstat_getumask_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned short *maskp) { struct filedesc fd; assert(kd != NULL); assert(kp != NULL); if (kp->ki_fd == NULL) return (-1); if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &fd, sizeof(fd))) { warnx("can't read filedesc at %p for pid %d", kp->ki_fd, kp->ki_pid); return (-1); } *maskp = fd.fd_cmask; return (0); } static int procstat_getumask_sysctl(pid_t pid, unsigned short *maskp) { int error; int mib[4]; size_t len; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_UMASK; mib[3] = pid; len = sizeof(*maskp); error = sysctl(mib, nitems(mib), maskp, &len, NULL, 0); if (error != 0 && errno != ESRCH && errno != EPERM) warn("sysctl: kern.proc.umask: %d", pid); return (error); } static int procstat_getumask_core(struct procstat_core *core, unsigned short *maskp) { size_t len; unsigned short *buf; buf = procstat_core_get(core, PSC_TYPE_UMASK, NULL, &len); if (buf == NULL) return (-1); if (len < sizeof(*maskp)) { free(buf); return (-1); } *maskp = *buf; free(buf); return (0); } int procstat_getumask(struct procstat *procstat, struct kinfo_proc *kp, unsigned short *maskp) { switch(procstat->type) { case PROCSTAT_KVM: return (procstat_getumask_kvm(procstat->kd, kp, maskp)); case PROCSTAT_SYSCTL: return (procstat_getumask_sysctl(kp->ki_pid, maskp)); case PROCSTAT_CORE: return (procstat_getumask_core(procstat->core, maskp)); default: warnx("unknown access method: %d", procstat->type); return (-1); } } static int procstat_getrlimit_kvm(kvm_t *kd, struct kinfo_proc *kp, int which, struct rlimit* rlimit) { struct proc proc; unsigned long offset; assert(kd != NULL); assert(kp != NULL); assert(which >= 0 && which < RLIM_NLIMITS); if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc, sizeof(proc))) { warnx("can't read proc struct at %p for pid %d", kp->ki_paddr, kp->ki_pid); return (-1); } if (proc.p_limit == NULL) return (-1); offset = (unsigned long)proc.p_limit + sizeof(struct rlimit) * which; if (!kvm_read_all(kd, offset, rlimit, sizeof(*rlimit))) { warnx("can't read rlimit struct at %p for pid %d", (void *)offset, kp->ki_pid); return (-1); } return (0); } static int procstat_getrlimit_sysctl(pid_t pid, int which, struct rlimit* rlimit) { int error, name[5]; size_t len; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_RLIMIT; name[3] = pid; name[4] = which; len = sizeof(struct rlimit); error = sysctl(name, nitems(name), rlimit, &len, NULL, 0); if (error < 0 && errno != ESRCH) { warn("sysctl: kern.proc.rlimit: %d", pid); return (-1); } if (error < 0 || len != sizeof(struct rlimit)) return (-1); return (0); } static int procstat_getrlimit_core(struct procstat_core *core, int which, struct rlimit* rlimit) { size_t len; struct rlimit* rlimits; if (which < 0 || which >= RLIM_NLIMITS) { errno = EINVAL; warn("getrlimit: which"); return (-1); } rlimits = procstat_core_get(core, PSC_TYPE_RLIMIT, NULL, &len); if (rlimits == NULL) return (-1); if (len < sizeof(struct rlimit) * RLIM_NLIMITS) { free(rlimits); return (-1); } *rlimit = rlimits[which]; return (0); } int procstat_getrlimit(struct procstat *procstat, struct kinfo_proc *kp, int which, struct rlimit* rlimit) { switch(procstat->type) { case PROCSTAT_KVM: return (procstat_getrlimit_kvm(procstat->kd, kp, which, rlimit)); case PROCSTAT_SYSCTL: return (procstat_getrlimit_sysctl(kp->ki_pid, which, rlimit)); case PROCSTAT_CORE: return (procstat_getrlimit_core(procstat->core, which, rlimit)); default: warnx("unknown access method: %d", procstat->type); return (-1); } } static int procstat_getpathname_sysctl(pid_t pid, char *pathname, size_t maxlen) { int error, name[4]; size_t len; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_PATHNAME; name[3] = pid; len = maxlen; error = sysctl(name, nitems(name), pathname, &len, NULL, 0); if (error != 0 && errno != ESRCH) warn("sysctl: kern.proc.pathname: %d", pid); if (len == 0) pathname[0] = '\0'; return (error); } static int procstat_getpathname_core(struct procstat_core *core, char *pathname, size_t maxlen) { struct kinfo_file *files; int cnt, i, result; files = kinfo_getfile_core(core, &cnt); if (files == NULL) return (-1); result = -1; for (i = 0; i < cnt; i++) { if (files[i].kf_fd != KF_FD_TYPE_TEXT) continue; strncpy(pathname, files[i].kf_path, maxlen); result = 0; break; } free(files); return (result); } int procstat_getpathname(struct procstat *procstat, struct kinfo_proc *kp, char *pathname, size_t maxlen) { switch(procstat->type) { case PROCSTAT_KVM: /* XXX: Return empty string. */ if (maxlen > 0) pathname[0] = '\0'; return (0); case PROCSTAT_SYSCTL: return (procstat_getpathname_sysctl(kp->ki_pid, pathname, maxlen)); case PROCSTAT_CORE: return (procstat_getpathname_core(procstat->core, pathname, maxlen)); default: warnx("unknown access method: %d", procstat->type); return (-1); } } static int procstat_getosrel_kvm(kvm_t *kd, struct kinfo_proc *kp, int *osrelp) { struct proc proc; assert(kd != NULL); assert(kp != NULL); if (!kvm_read_all(kd, (unsigned long)kp->ki_paddr, &proc, sizeof(proc))) { warnx("can't read proc struct at %p for pid %d", kp->ki_paddr, kp->ki_pid); return (-1); } *osrelp = proc.p_osrel; return (0); } static int procstat_getosrel_sysctl(pid_t pid, int *osrelp) { int error, name[4]; size_t len; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_OSREL; name[3] = pid; len = sizeof(*osrelp); error = sysctl(name, nitems(name), osrelp, &len, NULL, 0); if (error != 0 && errno != ESRCH) warn("sysctl: kern.proc.osrel: %d", pid); return (error); } static int procstat_getosrel_core(struct procstat_core *core, int *osrelp) { size_t len; int *buf; buf = procstat_core_get(core, PSC_TYPE_OSREL, NULL, &len); if (buf == NULL) return (-1); if (len < sizeof(*osrelp)) { free(buf); return (-1); } *osrelp = *buf; free(buf); return (0); } int procstat_getosrel(struct procstat *procstat, struct kinfo_proc *kp, int *osrelp) { switch(procstat->type) { case PROCSTAT_KVM: return (procstat_getosrel_kvm(procstat->kd, kp, osrelp)); case PROCSTAT_SYSCTL: return (procstat_getosrel_sysctl(kp->ki_pid, osrelp)); case PROCSTAT_CORE: return (procstat_getosrel_core(procstat->core, osrelp)); default: warnx("unknown access method: %d", procstat->type); return (-1); } } #define PROC_AUXV_MAX 256 #if __ELF_WORD_SIZE == 64 static const char *elf32_sv_names[] = { "Linux ELF32", "FreeBSD ELF32", }; static int is_elf32_sysctl(pid_t pid) { int error, name[4]; size_t len, i; static char sv_name[256]; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_SV_NAME; name[3] = pid; len = sizeof(sv_name); error = sysctl(name, nitems(name), sv_name, &len, NULL, 0); if (error != 0 || len == 0) return (0); for (i = 0; i < sizeof(elf32_sv_names) / sizeof(*elf32_sv_names); i++) { if (strncmp(sv_name, elf32_sv_names[i], sizeof(sv_name)) == 0) return (1); } return (0); } static Elf_Auxinfo * procstat_getauxv32_sysctl(pid_t pid, unsigned int *cntp) { Elf_Auxinfo *auxv; Elf32_Auxinfo *auxv32; void *ptr; size_t len; unsigned int i, count; int name[4]; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_AUXV; name[3] = pid; len = PROC_AUXV_MAX * sizeof(Elf32_Auxinfo); auxv = NULL; auxv32 = malloc(len); if (auxv32 == NULL) { warn("malloc(%zu)", len); goto out; } if (sysctl(name, nitems(name), auxv32, &len, NULL, 0) == -1) { if (errno != ESRCH && errno != EPERM) warn("sysctl: kern.proc.auxv: %d: %d", pid, errno); goto out; } count = len / sizeof(Elf_Auxinfo); auxv = malloc(count * sizeof(Elf_Auxinfo)); if (auxv == NULL) { warn("malloc(%zu)", count * sizeof(Elf_Auxinfo)); goto out; } for (i = 0; i < count; i++) { /* * XXX: We expect that values for a_type on a 32-bit platform * are directly mapped to values on 64-bit one, which is not * necessarily true. */ auxv[i].a_type = auxv32[i].a_type; ptr = &auxv32[i].a_un; auxv[i].a_un.a_val = *((uint32_t *)ptr); } *cntp = count; out: free(auxv32); return (auxv); } #endif /* __ELF_WORD_SIZE == 64 */ static Elf_Auxinfo * procstat_getauxv_sysctl(pid_t pid, unsigned int *cntp) { Elf_Auxinfo *auxv; int name[4]; size_t len; #if __ELF_WORD_SIZE == 64 if (is_elf32_sysctl(pid)) return (procstat_getauxv32_sysctl(pid, cntp)); #endif name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_AUXV; name[3] = pid; len = PROC_AUXV_MAX * sizeof(Elf_Auxinfo); auxv = malloc(len); if (auxv == NULL) { warn("malloc(%zu)", len); return (NULL); } if (sysctl(name, nitems(name), auxv, &len, NULL, 0) == -1) { if (errno != ESRCH && errno != EPERM) warn("sysctl: kern.proc.auxv: %d: %d", pid, errno); free(auxv); return (NULL); } *cntp = len / sizeof(Elf_Auxinfo); return (auxv); } static Elf_Auxinfo * procstat_getauxv_core(struct procstat_core *core, unsigned int *cntp) { Elf_Auxinfo *auxv; size_t len; auxv = procstat_core_get(core, PSC_TYPE_AUXV, NULL, &len); if (auxv == NULL) return (NULL); *cntp = len / sizeof(Elf_Auxinfo); return (auxv); } Elf_Auxinfo * procstat_getauxv(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *cntp) { switch(procstat->type) { case PROCSTAT_KVM: warnx("kvm method is not supported"); return (NULL); case PROCSTAT_SYSCTL: return (procstat_getauxv_sysctl(kp->ki_pid, cntp)); case PROCSTAT_CORE: return (procstat_getauxv_core(procstat->core, cntp)); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freeauxv(struct procstat *procstat __unused, Elf_Auxinfo *auxv) { free(auxv); } static struct ptrace_lwpinfo * procstat_getptlwpinfo_core(struct procstat_core *core, unsigned int *cntp) { void *buf; struct ptrace_lwpinfo *pl; unsigned int cnt; size_t len; cnt = procstat_core_note_count(core, PSC_TYPE_PTLWPINFO); if (cnt == 0) return (NULL); len = cnt * sizeof(*pl); buf = calloc(1, len); pl = procstat_core_get(core, PSC_TYPE_PTLWPINFO, buf, &len); if (pl == NULL) { free(buf); return (NULL); } *cntp = len / sizeof(*pl); return (pl); } struct ptrace_lwpinfo * procstat_getptlwpinfo(struct procstat *procstat, unsigned int *cntp) { switch (procstat->type) { case PROCSTAT_CORE: return (procstat_getptlwpinfo_core(procstat->core, cntp)); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freeptlwpinfo(struct procstat *procstat __unused, struct ptrace_lwpinfo *pl) { free(pl); } static struct kinfo_kstack * procstat_getkstack_sysctl(pid_t pid, int *cntp) { struct kinfo_kstack *kkstp; int error, name[4]; size_t len; name[0] = CTL_KERN; name[1] = KERN_PROC; name[2] = KERN_PROC_KSTACK; name[3] = pid; len = 0; error = sysctl(name, nitems(name), NULL, &len, NULL, 0); if (error < 0 && errno != ESRCH && errno != EPERM && errno != ENOENT) { warn("sysctl: kern.proc.kstack: %d", pid); return (NULL); } if (error == -1 && errno == ENOENT) { warnx("sysctl: kern.proc.kstack unavailable" " (options DDB or options STACK required in kernel)"); return (NULL); } if (error == -1) return (NULL); kkstp = malloc(len); if (kkstp == NULL) { warn("malloc(%zu)", len); return (NULL); } if (sysctl(name, nitems(name), kkstp, &len, NULL, 0) == -1) { warn("sysctl: kern.proc.pid: %d", pid); free(kkstp); return (NULL); } *cntp = len / sizeof(*kkstp); return (kkstp); } struct kinfo_kstack * procstat_getkstack(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *cntp) { switch(procstat->type) { case PROCSTAT_KVM: warnx("kvm method is not supported"); return (NULL); case PROCSTAT_SYSCTL: return (procstat_getkstack_sysctl(kp->ki_pid, cntp)); case PROCSTAT_CORE: warnx("core method is not supported"); return (NULL); default: warnx("unknown access method: %d", procstat->type); return (NULL); } } void procstat_freekstack(struct procstat *procstat __unused, struct kinfo_kstack *kkstp) { free(kkstp); } Index: head/lib/libprocstat/libprocstat.h =================================================================== --- head/lib/libprocstat/libprocstat.h (revision 318735) +++ head/lib/libprocstat/libprocstat.h (revision 318736) @@ -1,226 +1,226 @@ /*- * Copyright (c) 2009 Stanislav Sedov * Copyright (c) 2017 Dell EMC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LIBPROCSTAT_H_ #define _LIBPROCSTAT_H_ /* * XXX: sys/elf.h conflicts with zfs_context.h. Workaround this by not * including conflicting parts when building zfs code. */ #ifndef ZFS #include #endif #include /* * Vnode types. */ #define PS_FST_VTYPE_VNON 1 #define PS_FST_VTYPE_VREG 2 #define PS_FST_VTYPE_VDIR 3 #define PS_FST_VTYPE_VBLK 4 #define PS_FST_VTYPE_VCHR 5 #define PS_FST_VTYPE_VLNK 6 #define PS_FST_VTYPE_VSOCK 7 #define PS_FST_VTYPE_VFIFO 8 #define PS_FST_VTYPE_VBAD 9 #define PS_FST_VTYPE_UNKNOWN 255 /* * Descriptor types. */ #define PS_FST_TYPE_VNODE 1 #define PS_FST_TYPE_FIFO 2 #define PS_FST_TYPE_SOCKET 3 #define PS_FST_TYPE_PIPE 4 #define PS_FST_TYPE_PTS 5 #define PS_FST_TYPE_KQUEUE 6 #define PS_FST_TYPE_CRYPTO 7 #define PS_FST_TYPE_MQUEUE 8 #define PS_FST_TYPE_SHM 9 #define PS_FST_TYPE_SEM 10 #define PS_FST_TYPE_UNKNOWN 11 #define PS_FST_TYPE_NONE 12 /* * Special descriptor numbers. */ #define PS_FST_UFLAG_RDIR 0x0001 #define PS_FST_UFLAG_CDIR 0x0002 #define PS_FST_UFLAG_JAIL 0x0004 #define PS_FST_UFLAG_TRACE 0x0008 #define PS_FST_UFLAG_TEXT 0x0010 #define PS_FST_UFLAG_MMAP 0x0020 #define PS_FST_UFLAG_CTTY 0x0040 /* * Descriptor flags. */ #define PS_FST_FFLAG_READ 0x0001 #define PS_FST_FFLAG_WRITE 0x0002 #define PS_FST_FFLAG_NONBLOCK 0x0004 #define PS_FST_FFLAG_APPEND 0x0008 #define PS_FST_FFLAG_SHLOCK 0x0010 #define PS_FST_FFLAG_EXLOCK 0x0020 #define PS_FST_FFLAG_ASYNC 0x0040 #define PS_FST_FFLAG_SYNC 0x0080 #define PS_FST_FFLAG_NOFOLLOW 0x0100 #define PS_FST_FFLAG_CREAT 0x0200 #define PS_FST_FFLAG_TRUNC 0x0400 #define PS_FST_FFLAG_EXCL 0x0800 #define PS_FST_FFLAG_DIRECT 0x1000 #define PS_FST_FFLAG_EXEC 0x2000 #define PS_FST_FFLAG_HASLOCK 0x4000 struct kinfo_kstack; struct kinfo_vmentry; struct procstat; struct ptrace_lwpinfo; struct rlimit; struct filestat { int fs_type; /* Descriptor type. */ int fs_flags; /* filestat specific flags. */ int fs_fflags; /* Descriptor access flags. */ int fs_uflags; /* How this file is used. */ int fs_fd; /* File descriptor number. */ int fs_ref_count; /* Reference count. */ off_t fs_offset; /* Seek location. */ void *fs_typedep; /* Type dependent data. */ char *fs_path; STAILQ_ENTRY(filestat) next; cap_rights_t fs_cap_rights; /* Capability rights, if flag set. */ }; struct vnstat { uint64_t vn_fileid; uint64_t vn_size; + uint64_t vn_dev; + uint64_t vn_fsid; char *vn_mntdir; - uint32_t vn_dev; - uint32_t vn_fsid; int vn_type; uint16_t vn_mode; char vn_devname[SPECNAMELEN + 1]; }; struct ptsstat { - uint32_t dev; + uint64_t dev; char devname[SPECNAMELEN + 1]; }; struct pipestat { size_t buffer_cnt; uint64_t addr; uint64_t peer; }; struct semstat { uint32_t value; uint16_t mode; }; struct shmstat { uint64_t size; uint16_t mode; }; struct sockstat { uint64_t inp_ppcb; uint64_t so_addr; uint64_t so_pcb; uint64_t unp_conn; int dom_family; int proto; int so_rcv_sb_state; int so_snd_sb_state; struct sockaddr_storage sa_local; /* Socket address. */ struct sockaddr_storage sa_peer; /* Peer address. */ int type; char dname[32]; }; STAILQ_HEAD(filestat_list, filestat); __BEGIN_DECLS void procstat_close(struct procstat *procstat); void procstat_freeargv(struct procstat *procstat); #ifndef ZFS void procstat_freeauxv(struct procstat *procstat, Elf_Auxinfo *auxv); #endif void procstat_freeenvv(struct procstat *procstat); void procstat_freegroups(struct procstat *procstat, gid_t *groups); void procstat_freekstack(struct procstat *procstat, struct kinfo_kstack *kkstp); void procstat_freeprocs(struct procstat *procstat, struct kinfo_proc *p); void procstat_freefiles(struct procstat *procstat, struct filestat_list *head); void procstat_freeptlwpinfo(struct procstat *procstat, struct ptrace_lwpinfo *pl); void procstat_freevmmap(struct procstat *procstat, struct kinfo_vmentry *vmmap); struct filestat_list *procstat_getfiles(struct procstat *procstat, struct kinfo_proc *kp, int mmapped); struct kinfo_proc *procstat_getprocs(struct procstat *procstat, int what, int arg, unsigned int *count); int procstat_get_pipe_info(struct procstat *procstat, struct filestat *fst, struct pipestat *pipe, char *errbuf); int procstat_get_pts_info(struct procstat *procstat, struct filestat *fst, struct ptsstat *pts, char *errbuf); int procstat_get_sem_info(struct procstat *procstat, struct filestat *fst, struct semstat *sem, char *errbuf); int procstat_get_shm_info(struct procstat *procstat, struct filestat *fst, struct shmstat *shm, char *errbuf); int procstat_get_socket_info(struct procstat *procstat, struct filestat *fst, struct sockstat *sock, char *errbuf); int procstat_get_vnode_info(struct procstat *procstat, struct filestat *fst, struct vnstat *vn, char *errbuf); char **procstat_getargv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr); #ifndef ZFS Elf_Auxinfo *procstat_getauxv(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *cntp); #endif struct ptrace_lwpinfo *procstat_getptlwpinfo(struct procstat *procstat, unsigned int *cntp); char **procstat_getenvv(struct procstat *procstat, struct kinfo_proc *p, size_t nchr); gid_t *procstat_getgroups(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *count); struct kinfo_kstack *procstat_getkstack(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *count); int procstat_getosrel(struct procstat *procstat, struct kinfo_proc *kp, int *osrelp); int procstat_getpathname(struct procstat *procstat, struct kinfo_proc *kp, char *pathname, size_t maxlen); int procstat_getrlimit(struct procstat *procstat, struct kinfo_proc *kp, int which, struct rlimit* rlimit); int procstat_getumask(struct procstat *procstat, struct kinfo_proc *kp, unsigned short* umask); struct kinfo_vmentry *procstat_getvmmap(struct procstat *procstat, struct kinfo_proc *kp, unsigned int *count); struct procstat *procstat_open_core(const char *filename); struct procstat *procstat_open_sysctl(void); struct procstat *procstat_open_kvm(const char *nlistf, const char *memf); __END_DECLS #endif /* !_LIBPROCSTAT_H_ */ Index: head/lib/libprocstat/libprocstat_compat.c =================================================================== --- head/lib/libprocstat/libprocstat_compat.c (nonexistent) +++ head/lib/libprocstat/libprocstat_compat.c (revision 318736) @@ -0,0 +1,144 @@ +/*- + * Copyright (c) 2014 Gleb Kurtsou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include "libprocstat.h" + +struct freebsd11_ptsstat { + uint32_t dev; + char devname[SPECNAMELEN + 1]; +}; + +struct freebsd11_vnstat { + uint64_t vn_fileid; + uint64_t vn_size; + char *vn_mntdir; + uint32_t vn_dev; + uint32_t vn_fsid; + int vn_type; + uint16_t vn_mode; + char vn_devname[SPECNAMELEN + 1]; +}; +struct freebsd11_semstat { + uint32_t value; + uint16_t mode; +}; +struct freebsd11_shmstat { + uint64_t size; + uint16_t mode; +}; + +int freebsd11_procstat_get_pts_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_ptsstat *pts, char *errbuf); +int freebsd11_procstat_get_sem_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_semstat *sem, char *errbuf); +int freebsd11_procstat_get_shm_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_shmstat *shm, char *errbuf); +int freebsd11_procstat_get_vnode_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_vnstat *vn, char *errbuf); + +int +freebsd11_procstat_get_pts_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_ptsstat *pts_compat, char *errbuf) +{ + struct ptsstat pts; + int r; + + r = procstat_get_pts_info(procstat, fst, &pts, errbuf); + if (r != 0) + return (r); + pts_compat->dev = pts.dev; + memcpy(pts_compat->devname, pts.devname, + sizeof(pts_compat->devname)); + return (0); +} + +int +freebsd11_procstat_get_sem_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_semstat *sem_compat, char *errbuf) +{ + struct semstat sem; + int r; + + r = procstat_get_sem_info(procstat, fst, &sem, errbuf); + if (r != 0) + return (r); + sem_compat->value = sem.value; + sem_compat->mode = sem.mode; + return (0); +} + +int +freebsd11_procstat_get_shm_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_shmstat *shm_compat, char *errbuf) +{ + struct shmstat shm; + int r; + + r = procstat_get_shm_info(procstat, fst, &shm, errbuf); + if (r != 0) + return (r); + shm_compat->size = shm.size; + shm_compat->mode = shm.mode; + return (0); +} + +int +freebsd11_procstat_get_vnode_info(struct procstat *procstat, + struct filestat *fst, struct freebsd11_vnstat *vn_compat, char *errbuf) +{ + struct vnstat vn; + int r; + + r = procstat_get_vnode_info(procstat, fst, &vn, errbuf); + if (r != 0) + return (r); + vn_compat->vn_fileid = vn.vn_fileid; + vn_compat->vn_size = vn.vn_size; + vn_compat->vn_mntdir = vn.vn_mntdir; + vn_compat->vn_dev = vn.vn_dev; + vn_compat->vn_fsid = vn.vn_fsid; + vn_compat->vn_type = vn.vn_type; + vn_compat->vn_mode = vn.vn_mode; + memcpy(vn_compat->vn_devname, vn.vn_devname, + sizeof(vn_compat->vn_devname)); + return (0); +} + +__sym_compat(procstat_get_pts_info, freebsd11_procstat_get_pts_info, FBSD_1.2); +__sym_compat(procstat_get_vnode_info, freebsd11_procstat_get_vnode_info, + FBSD_1.2); +__sym_compat(procstat_get_sem_info, freebsd11_procstat_get_sem_info, FBSD_1.3); +__sym_compat(procstat_get_shm_info, freebsd11_procstat_get_shm_info, FBSD_1.3); Property changes on: head/lib/libprocstat/libprocstat_compat.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libufs/libufs.h =================================================================== --- head/lib/libufs/libufs.h (revision 318735) +++ head/lib/libufs/libufs.h (revision 318736) @@ -1,149 +1,149 @@ /* * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __LIBUFS_H__ #define __LIBUFS_H__ /* * libufs structures. */ /* * userland ufs disk. */ struct uufsd { const char *d_name; /* disk name */ int d_ufs; /* decimal UFS version */ int d_fd; /* raw device file descriptor */ long d_bsize; /* device bsize */ ufs2_daddr_t d_sblock; /* superblock location */ struct csum *d_sbcsum; /* Superblock summary info */ caddr_t d_inoblock; /* inode block */ - ino_t d_inomin; /* low inode */ - ino_t d_inomax; /* high inode */ + uint32_t d_inomin; /* low inode (not ino_t for ABI compat) */ + uint32_t d_inomax; /* high inode (not ino_t for ABI compat) */ union { struct fs d_fs; /* filesystem information */ char d_sb[MAXBSIZE]; /* superblock as buffer */ } d_sbunion; union { struct cg d_cg; /* cylinder group */ char d_buf[MAXBSIZE]; /* cylinder group storage */ } d_cgunion; int d_ccg; /* current cylinder group */ int d_lcg; /* last cylinder group (in d_cg) */ const char *d_error; /* human readable disk error */ int d_mine; /* internal flags */ #define d_fs d_sbunion.d_fs #define d_sb d_sbunion.d_sb #define d_cg d_cgunion.d_cg }; /* * libufs macros (internal, non-exported). */ #ifdef _LIBUFS /* * Trace steps through libufs, to be used at entry and erroneous return. */ static inline void ERROR(struct uufsd *u, const char *str) { #ifdef _LIBUFS_DEBUGGING if (str != NULL) { fprintf(stderr, "libufs: %s", str); if (errno != 0) fprintf(stderr, ": %s", strerror(errno)); fprintf(stderr, "\n"); } #endif if (u != NULL) u->d_error = str; } #endif /* _LIBUFS */ __BEGIN_DECLS /* * libufs prototypes. */ /* * block.c */ ssize_t bread(struct uufsd *, ufs2_daddr_t, void *, size_t); ssize_t bwrite(struct uufsd *, ufs2_daddr_t, const void *, size_t); int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t); /* * cgroup.c */ ufs2_daddr_t cgballoc(struct uufsd *); int cgbfree(struct uufsd *, ufs2_daddr_t, long); ino_t cgialloc(struct uufsd *); int cgread(struct uufsd *); int cgread1(struct uufsd *, int); int cgwrite(struct uufsd *); int cgwrite1(struct uufsd *, int); /* * inode.c */ int getino(struct uufsd *, void **, ino_t, int *); int putino(struct uufsd *); /* * sblock.c */ int sbread(struct uufsd *); int sbwrite(struct uufsd *, int); /* * type.c */ int ufs_disk_close(struct uufsd *); int ufs_disk_fillout(struct uufsd *, const char *); int ufs_disk_fillout_blank(struct uufsd *, const char *); int ufs_disk_write(struct uufsd *); /* * ffs_subr.c */ void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); __END_DECLS #endif /* __LIBUFS_H__ */ Index: head/sbin/badsect/badsect.c =================================================================== --- head/sbin/badsect/badsect.c (revision 318735) +++ head/sbin/badsect/badsect.c (revision 318736) @@ -1,193 +1,182 @@ /* * Copyright (c) 1981, 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1981, 1983, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static const char sccsid[] = "@(#)badsect.c 8.1 (Berkeley) 6/5/93"; #endif #endif #include __FBSDID("$FreeBSD$"); /* * badsect * * Badsect takes a list of file-system relative sector numbers * and makes files containing the blocks of which these sectors are a part. * It can be used to contain sectors which have problems if these sectors * are not part of the bad file for the pack (see bad144). For instance, * this program can be used if the driver for the file system in question * does not support bad block forwarding. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define sblock disk.d_fs #define acg disk.d_cg static struct uufsd disk; static struct fs *fs = &sblock; static int errs; int chkuse(daddr_t, int); static void usage(void) { fprintf(stderr, "usage: badsect bbdir blkno ...\n"); exit(1); } int main(int argc, char *argv[]) { daddr_t diskbn; daddr_t number; struct stat stbuf, devstat; struct dirent *dp; DIR *dirp; char name[2 * MAXPATHLEN]; char *name_dir_end; if (argc < 3) usage(); if (chdir(argv[1]) < 0 || stat(".", &stbuf) < 0) err(2, "%s", argv[1]); strcpy(name, _PATH_DEV); if ((dirp = opendir(name)) == NULL) err(3, "%s", name); name_dir_end = name + strlen(name); while ((dp = readdir(dirp)) != NULL) { strcpy(name_dir_end, dp->d_name); if (lstat(name, &devstat) < 0) err(4, "%s", name); if (stbuf.st_dev == devstat.st_rdev && (devstat.st_mode & IFMT) == IFCHR) break; } closedir(dirp); if (dp == NULL) { printf("Cannot find dev 0%lo corresponding to %s\n", (u_long)stbuf.st_rdev, argv[1]); exit(5); } if (ufs_disk_fillout(&disk, name) == -1) { if (disk.d_error != NULL) errx(6, "%s: %s", name, disk.d_error); else err(7, "%s", name); } for (argc -= 2, argv += 2; argc > 0; argc--, argv++) { number = strtol(*argv, NULL, 0); if (errno == EINVAL || errno == ERANGE) err(8, "%s", *argv); if (chkuse(number, 1)) continue; - /* - * Print a warning if converting the block number to a dev_t - * will truncate it. badsect was not very useful in versions - * of BSD before 4.4 because dev_t was 16 bits and another - * bit was lost by bogus sign extensions. - */ diskbn = dbtofsb(fs, number); - if ((dev_t)diskbn != diskbn) { - printf("sector %ld cannot be represented as a dev_t\n", - (long)number); - errs++; - } - else if (mknod(*argv, IFMT|0600, (dev_t)diskbn) < 0) { + if (mknod(*argv, IFMT|0600, (dev_t)diskbn) < 0) { warn("%s", *argv); errs++; } } ufs_disk_close(&disk); printf("Don't forget to run ``fsck %s''\n", name); exit(errs); } int chkuse(daddr_t blkno, int cnt) { int cg; daddr_t fsbn, bn; fsbn = dbtofsb(fs, blkno); if ((unsigned)(fsbn+cnt) > fs->fs_size) { printf("block %ld out of range of file system\n", (long)blkno); return (1); } cg = dtog(fs, fsbn); if (fsbn < cgdmin(fs, cg)) { if (cg == 0 || (fsbn+cnt) > cgsblock(fs, cg)) { printf("block %ld in non-data area: cannot attach\n", (long)blkno); return (1); } } else { if ((fsbn+cnt) > cgbase(fs, cg+1)) { printf("block %ld in non-data area: cannot attach\n", (long)blkno); return (1); } } if (cgread1(&disk, cg) != 1) { fprintf(stderr, "cg %d: could not be read\n", cg); errs++; return (1); } if (!cg_chkmagic(&acg)) { fprintf(stderr, "cg %d: bad magic number\n", cg); errs++; return (1); } bn = dtogd(fs, fsbn); if (isclr(cg_blksfree(&acg), bn)) printf("Warning: sector %ld is in use\n", (long)blkno); return (0); } Index: head/sbin/fsck_ffs/suj.c =================================================================== --- head/sbin/fsck_ffs/suj.c (revision 318735) +++ head/sbin/fsck_ffs/suj.c (revision 318736) @@ -1,2802 +1,2802 @@ /*- * Copyright 2009, 2010 Jeffrey W. Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" #define DOTDOT_OFFSET DIRECTSIZ(1) #define SUJ_HASHSIZE 2048 #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) struct suj_seg { TAILQ_ENTRY(suj_seg) ss_next; struct jsegrec ss_rec; uint8_t *ss_blk; }; struct suj_rec { TAILQ_ENTRY(suj_rec) sr_next; union jrec *sr_rec; }; TAILQ_HEAD(srechd, suj_rec); struct suj_ino { LIST_ENTRY(suj_ino) si_next; struct srechd si_recs; struct srechd si_newrecs; struct srechd si_movs; struct jtrncrec *si_trunc; ino_t si_ino; char si_skipparent; char si_hasrecs; char si_blkadj; char si_linkadj; int si_mode; nlink_t si_nlinkadj; nlink_t si_nlink; nlink_t si_dotlinks; }; LIST_HEAD(inohd, suj_ino); struct suj_blk { LIST_ENTRY(suj_blk) sb_next; struct srechd sb_recs; ufs2_daddr_t sb_blk; }; LIST_HEAD(blkhd, suj_blk); struct data_blk { LIST_ENTRY(data_blk) db_next; uint8_t *db_buf; ufs2_daddr_t db_blk; int db_size; int db_dirty; }; struct ino_blk { LIST_ENTRY(ino_blk) ib_next; uint8_t *ib_buf; int ib_dirty; ufs2_daddr_t ib_blk; }; LIST_HEAD(iblkhd, ino_blk); struct suj_cg { LIST_ENTRY(suj_cg) sc_next; struct blkhd sc_blkhash[SUJ_HASHSIZE]; struct inohd sc_inohash[SUJ_HASHSIZE]; struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; struct ino_blk *sc_lastiblk; struct suj_ino *sc_lastino; struct suj_blk *sc_lastblk; uint8_t *sc_cgbuf; struct cg *sc_cgp; int sc_dirty; int sc_cgx; }; static LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; static LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; static struct suj_cg *lastcg; static struct data_blk *lastblk; static TAILQ_HEAD(seghd, suj_seg) allsegs; static uint64_t oldseq; static struct uufsd *disk = NULL; static struct fs *fs = NULL; static ino_t sujino; /* * Summary statistics. */ static uint64_t freefrags; static uint64_t freeblocks; static uint64_t freeinos; static uint64_t freedir; static uint64_t jbytes; static uint64_t jrecs; static jmp_buf jmpbuf; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); static void err_suj(const char *, ...) __dead2; static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); static void ino_build(struct suj_ino *); static int blk_isfree(ufs2_daddr_t); static void initsuj(void); static void * errmalloc(size_t n) { void *a; a = Malloc(n); if (a == NULL) err(EX_OSERR, "malloc(%zu)", n); return (a); } /* * When hit a fatal error in journalling check, print out * the error and then offer to fallback to normal fsck. */ static void err_suj(const char * restrict fmt, ...) { va_list ap; if (preen) (void)fprintf(stdout, "%s: ", cdevname); va_start(ap, fmt); (void)vfprintf(stdout, fmt, ap); va_end(ap); longjmp(jmpbuf, -1); } /* * Open the given provider, load superblock. */ static void opendisk(const char *devnam) { if (disk != NULL) return; disk = Malloc(sizeof(*disk)); if (disk == NULL) err(EX_OSERR, "malloc(%zu)", sizeof(*disk)); if (ufs_disk_fillout(disk, devnam) == -1) { err(EX_OSERR, "ufs_disk_fillout(%s) failed: %s", devnam, disk->d_error); } fs = &disk->d_fs; if (real_dev_bsize == 0 && ioctl(disk->d_fd, DIOCGSECTORSIZE, &real_dev_bsize) == -1) real_dev_bsize = secsize; if (debug) printf("dev_bsize %u\n", real_dev_bsize); } /* * Mark file system as clean, write the super-block back, close the disk. */ static void closedisk(const char *devnam) { struct csum *cgsum; uint32_t i; /* * Recompute the fs summary info from correct cs summaries. */ bzero(&fs->fs_cstotal, sizeof(struct csum_total)); for (i = 0; i < fs->fs_ncg; i++) { cgsum = &fs->fs_cs(fs, i); fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; } fs->fs_pendinginodes = 0; fs->fs_pendingblocks = 0; fs->fs_clean = 1; fs->fs_time = time(NULL); fs->fs_mtime = time(NULL); if (sbwrite(disk, 0) == -1) err(EX_OSERR, "sbwrite(%s)", devnam); if (ufs_disk_close(disk) == -1) err(EX_OSERR, "ufs_disk_close(%s)", devnam); free(disk); disk = NULL; fs = NULL; } /* * Lookup a cg by number in the hash so we can keep track of which cgs * need stats rebuilt. */ static struct suj_cg * cg_lookup(int cgx) { struct cghd *hd; struct suj_cg *sc; if (cgx < 0 || cgx >= fs->fs_ncg) err_suj("Bad cg number %d\n", cgx); if (lastcg && lastcg->sc_cgx == cgx) return (lastcg); hd = &cghash[SUJ_HASH(cgx)]; LIST_FOREACH(sc, hd, sc_next) if (sc->sc_cgx == cgx) { lastcg = sc; return (sc); } sc = errmalloc(sizeof(*sc)); bzero(sc, sizeof(*sc)); sc->sc_cgbuf = errmalloc(fs->fs_bsize); sc->sc_cgp = (struct cg *)sc->sc_cgbuf; sc->sc_cgx = cgx; LIST_INSERT_HEAD(hd, sc, sc_next); if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, fs->fs_bsize) == -1) err_suj("Unable to read cylinder group %d\n", sc->sc_cgx); return (sc); } /* * Lookup an inode number in the hash and allocate a suj_ino if it does * not exist. */ static struct suj_ino * ino_lookup(ino_t ino, int creat) { struct suj_ino *sino; struct inohd *hd; struct suj_cg *sc; sc = cg_lookup(ino_to_cg(fs, ino)); if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) return (sc->sc_lastino); hd = &sc->sc_inohash[SUJ_HASH(ino)]; LIST_FOREACH(sino, hd, si_next) if (sino->si_ino == ino) return (sino); if (creat == 0) return (NULL); sino = errmalloc(sizeof(*sino)); bzero(sino, sizeof(*sino)); sino->si_ino = ino; TAILQ_INIT(&sino->si_recs); TAILQ_INIT(&sino->si_newrecs); TAILQ_INIT(&sino->si_movs); LIST_INSERT_HEAD(hd, sino, si_next); return (sino); } /* * Lookup a block number in the hash and allocate a suj_blk if it does * not exist. */ static struct suj_blk * blk_lookup(ufs2_daddr_t blk, int creat) { struct suj_blk *sblk; struct suj_cg *sc; struct blkhd *hd; sc = cg_lookup(dtog(fs, blk)); if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) return (sc->sc_lastblk); hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(sblk, hd, sb_next) if (sblk->sb_blk == blk) return (sblk); if (creat == 0) return (NULL); sblk = errmalloc(sizeof(*sblk)); bzero(sblk, sizeof(*sblk)); sblk->sb_blk = blk; TAILQ_INIT(&sblk->sb_recs); LIST_INSERT_HEAD(hd, sblk, sb_next); return (sblk); } static struct data_blk * dblk_lookup(ufs2_daddr_t blk) { struct data_blk *dblk; struct dblkhd *hd; hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; if (lastblk && lastblk->db_blk == blk) return (lastblk); LIST_FOREACH(dblk, hd, db_next) if (dblk->db_blk == blk) return (dblk); /* * The inode block wasn't located, allocate a new one. */ dblk = errmalloc(sizeof(*dblk)); bzero(dblk, sizeof(*dblk)); LIST_INSERT_HEAD(hd, dblk, db_next); dblk->db_blk = blk; return (dblk); } static uint8_t * dblk_read(ufs2_daddr_t blk, int size) { struct data_blk *dblk; dblk = dblk_lookup(blk); /* * I doubt size mismatches can happen in practice but it is trivial * to handle. */ if (size != dblk->db_size) { if (dblk->db_buf) free(dblk->db_buf); dblk->db_buf = errmalloc(size); dblk->db_size = size; if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) err_suj("Failed to read data block %jd\n", blk); } return (dblk->db_buf); } static void dblk_dirty(ufs2_daddr_t blk) { struct data_blk *dblk; dblk = dblk_lookup(blk); dblk->db_dirty = 1; } static void dblk_write(void) { struct data_blk *dblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(dblk, &dbhash[i], db_next) { if (dblk->db_dirty == 0 || dblk->db_size == 0) continue; if (bwrite(disk, fsbtodb(fs, dblk->db_blk), dblk->db_buf, dblk->db_size) == -1) err_suj("Unable to write block %jd\n", dblk->db_blk); } } } static union dinode * ino_read(ino_t ino) { struct ino_blk *iblk; struct iblkhd *hd; struct suj_cg *sc; ufs2_daddr_t blk; int off; blk = ino_to_fsba(fs, ino); sc = cg_lookup(ino_to_cg(fs, ino)); iblk = sc->sc_lastiblk; if (iblk && iblk->ib_blk == blk) goto found; hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) if (iblk->ib_blk == blk) goto found; /* * The inode block wasn't located, allocate a new one. */ iblk = errmalloc(sizeof(*iblk)); bzero(iblk, sizeof(*iblk)); iblk->ib_buf = errmalloc(fs->fs_bsize); iblk->ib_blk = blk; LIST_INSERT_HEAD(hd, iblk, ib_next); if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) err_suj("Failed to read inode block %jd\n", blk); found: sc->sc_lastiblk = iblk; off = ino_to_fsbo(fs, ino); if (fs->fs_magic == FS_UFS1_MAGIC) return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; else return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; } static void ino_dirty(ino_t ino) { struct ino_blk *iblk; struct iblkhd *hd; struct suj_cg *sc; ufs2_daddr_t blk; blk = ino_to_fsba(fs, ino); sc = cg_lookup(ino_to_cg(fs, ino)); iblk = sc->sc_lastiblk; if (iblk && iblk->ib_blk == blk) { iblk->ib_dirty = 1; return; } hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) { if (iblk->ib_blk == blk) { iblk->ib_dirty = 1; return; } } ino_read(ino); ino_dirty(ino); } static void iblk_write(struct ino_blk *iblk) { if (iblk->ib_dirty == 0) return; if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, fs->fs_bsize) == -1) err_suj("Failed to write inode block %jd\n", iblk->ib_blk); } static int blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) { ufs2_daddr_t bstart; ufs2_daddr_t bend; ufs2_daddr_t end; end = start + frags; bstart = brec->jb_blkno + brec->jb_oldfrags; bend = bstart + brec->jb_frags; if (start < bend && end > bstart) return (1); return (0); } static int blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, int frags) { if (brec->jb_ino != ino || brec->jb_lbn != lbn) return (0); if (brec->jb_blkno + brec->jb_oldfrags != start) return (0); if (brec->jb_frags < frags) return (0); return (1); } static void blk_setmask(struct jblkrec *brec, int *mask) { int i; for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) *mask |= 1 << i; } /* * Determine whether a given block has been reallocated to a new location. * Returns a mask of overlapping bits if any frags have been reused or * zero if the block has not been re-used and the contents can be trusted. * * This is used to ensure that an orphaned pointer due to truncate is safe * to be freed. The mask value can be used to free partial blocks. */ static int blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; struct jblkrec *brec; int mask; int off; /* * To be certain we're not freeing a reallocated block we lookup * this block in the blk hash and see if there is an allocation * journal record that overlaps with any fragments in the block * we're concerned with. If any fragments have ben reallocated * the block has already been freed and re-used for another purpose. */ mask = 0; sblk = blk_lookup(blknum(fs, blk), 0); if (sblk == NULL) return (0); off = blk - sblk->sb_blk; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; /* * If the block overlaps but does not match * exactly this record refers to the current * location. */ if (blk_overlaps(brec, blk, frags) == 0) continue; if (blk_equals(brec, ino, lbn, blk, frags) == 1) mask = 0; else blk_setmask(brec, &mask); } if (debug) printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } /* * Determine whether it is safe to follow an indirect. It is not safe * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. * It is also not safe to follow an indirect if the cg bitmap has been * cleared as a new allocation may write to the block prior to the journal * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ static int blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) { struct suj_blk *sblk; struct jblkrec *brec; sblk = blk_lookup(blk, 0); if (sblk == NULL) return (1); if (TAILQ_EMPTY(&sblk->sb_recs)) return (1); brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) return (!blk_isfree(blk)); return (0); } /* * Clear an inode from the cg bitmap. If the inode was already clear return * 0 so the caller knows it does not have to check the inode contents. */ static int ino_free(ino_t ino, int mode) { struct suj_cg *sc; uint8_t *inosused; struct cg *cgp; int cg; cg = ino_to_cg(fs, ino); ino = ino % fs->fs_ipg; sc = cg_lookup(cg); cgp = sc->sc_cgp; inosused = cg_inosused(cgp); /* * The bitmap may never have made it to the disk so we have to * conditionally clear. We can avoid writing the cg in this case. */ if (isclr(inosused, ino)) return (0); freeinos++; clrbit(inosused, ino); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; if ((mode & IFMT) == IFDIR) { freedir++; cgp->cg_cs.cs_ndir--; } sc->sc_dirty = 1; return (1); } /* * Free 'frags' frags starting at filesystem block 'bno' skipping any frags * set in the mask. */ static void blk_free(ufs2_daddr_t bno, int mask, int frags) { ufs1_daddr_t fragno, cgbno; struct suj_cg *sc; struct cg *cgp; int i, cg; uint8_t *blksfree; if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); /* * If it's not allocated we only wrote the journal entry * and never the bitmaps. Here we unconditionally clear and * resolve the cg summary later. */ if (frags == fs->fs_frag && mask == 0) { fragno = fragstoblks(fs, cgbno); ffs_setblock(fs, blksfree, fragno); freeblocks++; } else { /* * deallocate the fragment */ for (i = 0; i < frags; i++) if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { freefrags++; setbit(blksfree, cgbno + i); } } sc->sc_dirty = 1; } /* * Returns 1 if the whole block starting at 'bno' is marked free and 0 * otherwise. */ static int blk_isfree(ufs2_daddr_t bno) { struct suj_cg *sc; sc = cg_lookup(dtog(fs, bno)); return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); } /* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. */ static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) { ufs2_daddr_t *bap2; ufs2_daddr_t *bap1; ufs_lbn_t lbnadd; ufs_lbn_t base; int level; int i; if (blk == 0) return (0); level = lbn_level(cur); if (level == -1) err_suj("Invalid indir lbn %jd\n", lbn); if (level == 0 && lbn < 0) err_suj("Invalid lbn %jd\n", lbn); bap2 = (void *)dblk_read(blk, fs->fs_bsize); bap1 = (void *)bap2; lbnadd = 1; base = -(cur + level); for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); if (lbn > 0) i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; if (i < 0 || i >= NINDIR(fs)) err_suj("Invalid indirect index %d produced by lbn %jd\n", i, lbn); if (level == 0) cur = base + (i * lbnadd); else cur = -(base + (i * lbnadd)) - (level - 1); if (fs->fs_magic == FS_UFS1_MAGIC) blk = bap1[i]; else blk = bap2[i]; if (cur == lbn) return (blk); if (level == 0) err_suj("Invalid lbn %jd at level 0\n", lbn); return indir_blkatoff(blk, ino, cur, lbn); } /* * Finds the disk block address at the specified lbn within the inode * specified by ip. This follows the whole tree and honors di_size and * di_extsize so it is a true test of reachability. The lbn may be * negative if an extattr or indirect block is requested. */ static ufs2_daddr_t ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) { ufs_lbn_t tmpval; ufs_lbn_t cur; ufs_lbn_t next; int i; /* * Handle extattr blocks first. */ if (lbn < 0 && lbn >= -UFS_NXADDR) { lbn = -1 - lbn; if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) return (0); *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); return (ip->dp2.di_extb[lbn]); } /* * Now direct and indirect. */ if (DIP(ip, di_mode) == IFLNK && DIP(ip, di_size) < fs->fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < UFS_NDADDR) { *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); return (DIP(ip, di_db[lbn])); } *frags = fs->fs_frag; for (i = 0, tmpval = NINDIR(fs), cur = UFS_NDADDR; i < UFS_NIADDR; i++, tmpval *= NINDIR(fs), cur = next) { next = cur + tmpval; if (lbn == -cur - i) return (DIP(ip, di_ib[i])); /* * Determine whether the lbn in question is within this tree. */ if (lbn < 0 && -lbn >= next) continue; if (lbn > 0 && lbn >= next) continue; return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); } err_suj("lbn %jd not in ino\n", lbn); /* NOTREACHED */ } /* * Determine whether a block exists at a particular lbn in an inode. * Returns 1 if found, 0 if not. lbn may be negative for indirects * or ext blocks. */ static int blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) { union dinode *ip; ufs2_daddr_t nblk; ip = ino_read(ino); if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) return (0); nblk = ino_blkatoff(ip, ino, lbn, frags); return (nblk == blk); } /* * Clear the directory entry at diroff that should point to child. Minimal * checking is done and it is assumed that this path was verified with isat. */ static void ino_clrat(ino_t parent, off_t diroff, ino_t child) { union dinode *dip; struct direct *dp; ufs2_daddr_t blk; uint8_t *block; ufs_lbn_t lbn; int blksize; int frags; int doff; if (debug) printf("Clearing inode %ju from parent %ju at offset %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff); lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); dip = ino_read(parent); blk = ino_blkatoff(dip, parent, lbn, &frags); blksize = sblksize(fs, DIP(dip, di_size), lbn); block = dblk_read(blk, blksize); dp = (struct direct *)&block[doff]; if (dp->d_ino != child) errx(1, "Inode %ju does not exist in %ju at %jd", (uintmax_t)child, (uintmax_t)parent, diroff); dp->d_ino = 0; dblk_dirty(blk); /* * The actual .. reference count will already have been removed * from the parent by the .. remref record. */ } /* * Determines whether a pointer to an inode exists within a directory * at a specified offset. Returns the mode of the found entry. */ static int ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) { union dinode *dip; struct direct *dp; ufs2_daddr_t blk; uint8_t *block; ufs_lbn_t lbn; int blksize; int frags; int dpoff; int doff; *isdot = 0; dip = ino_read(parent); *mode = DIP(dip, di_mode); if ((*mode & IFMT) != IFDIR) { if (debug) { /* * This can happen if the parent inode * was reallocated. */ if (*mode != 0) printf("Directory %ju has bad mode %o\n", (uintmax_t)parent, *mode); else printf("Directory %ju has zero mode\n", (uintmax_t)parent); } return (0); } lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); blksize = sblksize(fs, DIP(dip, di_size), lbn); if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { if (debug) printf("ino %ju absent from %ju due to offset %jd" " exceeding size %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff, DIP(dip, di_size)); return (0); } blk = ino_blkatoff(dip, parent, lbn, &frags); if (blk <= 0) { if (debug) printf("Sparse directory %ju", (uintmax_t)parent); return (0); } block = dblk_read(blk, blksize); /* * Walk through the records from the start of the block to be * certain we hit a valid record and not some junk in the middle * of a file name. Stop when we reach or pass the expected offset. */ dpoff = rounddown(doff, DIRBLKSIZ); do { dp = (struct direct *)&block[dpoff]; if (dpoff == doff) break; if (dp->d_reclen == 0) break; dpoff += dp->d_reclen; } while (dpoff <= doff); if (dpoff > fs->fs_bsize) err_suj("Corrupt directory block in dir ino %ju\n", (uintmax_t)parent); /* Not found. */ if (dpoff != doff) { if (debug) printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n", (uintmax_t)child, (uintmax_t)parent, lbn, dpoff); return (0); } /* * We found the item in question. Record the mode and whether it's * a . or .. link for the caller. */ if (dp->d_ino == child) { if (child == parent) *isdot = 1; else if (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') *isdot = 1; *mode = DTTOIF(dp->d_type); return (1); } if (debug) printf("ino %ju doesn't match dirent ino %ju in parent %ju\n", (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent); return (0); } #define VISIT_INDIR 0x0001 #define VISIT_EXT 0x0002 #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ /* * Read an indirect level which may or may not be linked into an inode. */ static void indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, ino_visitor visitor, int flags) { ufs2_daddr_t *bap2; ufs1_daddr_t *bap1; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t nlbn; int level; int i; /* * Don't visit indirect blocks with contents we can't trust. This * should only happen when indir_visit() is called to complete a * truncate that never finished and not when a pointer is found via * an inode. */ if (blk == 0) return; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { if (debug) printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n", blk, (uintmax_t)ino, lbn, level); goto out; } lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bap1 = (void *)dblk_read(blk, fs->fs_bsize); bap2 = (void *)bap1; for (i = 0; i < NINDIR(fs); i++) { if (fs->fs_magic == FS_UFS1_MAGIC) nblk = *bap1++; else nblk = *bap2++; if (nblk == 0) continue; if (level == 0) { nlbn = -lbn + i * lbnadd; (*frags) += fs->fs_frag; visitor(ino, nlbn, nblk, fs->fs_frag); } else { nlbn = (lbn + 1) - (i * lbnadd); indir_visit(ino, nlbn, nblk, frags, visitor, flags); } } out: if (flags & VISIT_INDIR) { (*frags) += fs->fs_frag; visitor(ino, lbn, blk, fs->fs_frag); } } /* * Visit each block in an inode as specified by 'flags' and call a * callback function. The callback may inspect or free blocks. The * count of frags found according to the size in the file is returned. * This is not valid for sparse files but may be used to determine * the correct di_blocks for a file. */ static uint64_t ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) { ufs_lbn_t nextlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; uint64_t size; uint64_t fragcnt; int mode; int frags; int i; size = DIP(ip, di_size); mode = DIP(ip, di_mode) & IFMT; fragcnt = 0; if ((flags & VISIT_EXT) && fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { for (i = 0; i < UFS_NXADDR; i++) { if (ip->dp2.di_extb[i] == 0) continue; frags = sblksize(fs, ip->dp2.di_extsize, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); } } /* Skip datablocks for short links and devices. */ if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && size < fs->fs_maxsymlinklen)) return (fragcnt); for (i = 0; i < UFS_NDADDR; i++) { if (DIP(ip, di_db[i]) == 0) continue; frags = sblksize(fs, size, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, i, DIP(ip, di_db[i]), frags); } /* * We know the following indirects are real as we're following * real pointers to them. */ flags |= VISIT_ROOT; for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); if (DIP(ip, di_ib[i]) == 0) continue; indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, flags); } return (fragcnt); } /* * Null visitor function used when we just want to count blocks and * record the lbn. */ ufs_lbn_t visitlbn; static void null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { if (lbn > 0) visitlbn = lbn; } /* * Recalculate di_blocks when we discover that a block allocation or * free was not successfully completed. The kernel does not roll this back * because it would be too expensive to compute which indirects were * reachable at the time the inode was written. */ static void ino_adjblks(struct suj_ino *sino) { union dinode *ip; uint64_t blocks; uint64_t frags; off_t isize; off_t size; ino_t ino; ino = sino->si_ino; ip = ino_read(ino); /* No need to adjust zero'd inodes. */ if (DIP(ip, di_mode) == 0) return; /* * Visit all blocks and count them as well as recording the last * valid lbn in the file. If the file size doesn't agree with the * last lbn we need to truncate to fix it. Otherwise just adjust * the blocks count. */ visitlbn = 0; frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); blocks = fsbtodb(fs, frags); /* * We assume the size and direct block list is kept coherent by * softdep. For files that have extended into indirects we truncate * to the size in the inode or the maximum size permitted by * populated indirects. */ if (visitlbn >= UFS_NDADDR) { isize = DIP(ip, di_size); size = lblktosize(fs, visitlbn + 1); if (isize > size) isize = size; /* Always truncate to free any unpopulated indirects. */ ino_trunc(sino->si_ino, isize); return; } if (blocks == DIP(ip, di_blocks)) return; if (debug) printf("ino %ju adjusting block count from %jd to %jd\n", (uintmax_t)ino, DIP(ip, di_blocks), blocks); DIP_SET(ip, di_blocks, blocks); ino_dirty(ino); } static void blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags); } /* * Free a block or tree of blocks that was previously rooted in ino at * the given lbn. If the lbn is an indirect all children are freed * recursively. */ static void blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) { uint64_t resid; int mask; mask = blk_freemask(blk, ino, lbn, frags); resid = 0; if (lbn <= -UFS_NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); else blk_free(blk, mask, frags); } static void ino_setskip(struct suj_ino *sino, ino_t parent) { int isdot; int mode; if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) sino->si_skipparent = 1; } static void ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) { struct suj_ino *sino; struct suj_rec *srec; struct jrefrec *rrec; /* * Lookup this inode to see if we have a record for it. */ sino = ino_lookup(child, 0); /* * Tell any child directories we've already removed their * parent link cnt. Don't try to adjust our link down again. */ if (sino != NULL && isdotdot == 0) ino_setskip(sino, parent); /* * No valid record for this inode. Just drop the on-disk * link by one. */ if (sino == NULL || sino->si_hasrecs == 0) { ino_decr(child); return; } /* * Use ino_adjust() if ino_check() has already processed this * child. If we lose the last non-dot reference to a * directory it will be discarded. */ if (sino->si_linkadj) { sino->si_nlink--; if (isdotdot) sino->si_dotlinks--; ino_adjust(sino); return; } /* * If we haven't yet processed this inode we need to make * sure we will successfully discover the lost path. If not * use nlinkadj to remember. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_parent == parent && rrec->jr_diroff == diroff) return; } sino->si_nlinkadj++; } /* * Free the children of a directory when the directory is discarded. */ static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; struct direct *dp; off_t diroff; uint8_t *block; int skipparent; int isdotdot; int dpoff; int size; sino = ino_lookup(ino, 0); if (sino) skipparent = sino->si_skipparent; else skipparent = 0; size = lfragtosize(fs, frags); block = dblk_read(blk, size); dp = (struct direct *)&block[0]; for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { dp = (struct direct *)&block[dpoff]; if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) continue; if (dp->d_namlen == 1 && dp->d_name[0] == '.') continue; isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'; if (isdotdot && skipparent == 1) continue; if (debug) printf("Directory %ju removing ino %ju name %s\n", (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name); diroff = lblktosize(fs, lbn) + dpoff; ino_remref(ino, dp->d_ino, diroff, isdotdot); } } /* * Reclaim an inode, freeing all blocks and decrementing all children's * link counts. Free the inode back to the cg. */ static void ino_reclaim(union dinode *ip, ino_t ino, int mode) { uint32_t gen; if (ino == UFS_ROOTINO) err_suj("Attempting to free UFS_ROOTINO\n"); if (debug) printf("Truncating and freeing ino %ju, nlink %d, mode %o\n", (uintmax_t)ino, DIP(ip, di_nlink), DIP(ip, di_mode)); /* We are freeing an inode or directory. */ if ((DIP(ip, di_mode) & IFMT) == IFDIR) ino_visit(ip, ino, ino_free_children, 0); DIP_SET(ip, di_nlink, 0); ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); /* Here we have to clear the inode and release any blocks it holds. */ gen = DIP(ip, di_gen); if (fs->fs_magic == FS_UFS1_MAGIC) bzero(ip, sizeof(struct ufs1_dinode)); else bzero(ip, sizeof(struct ufs2_dinode)); DIP_SET(ip, di_gen, gen); ino_dirty(ino); ino_free(ino, mode); return; } /* * Adjust an inode's link count down by one when a directory goes away. */ static void ino_decr(ino_t ino) { union dinode *ip; int reqlink; int nlink; int mode; ip = ino_read(ino); nlink = DIP(ip, di_nlink); mode = DIP(ip, di_mode); if (nlink < 1) err_suj("Inode %d link count %d invalid\n", ino, nlink); if (mode == 0) err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink); nlink--; if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %d < %d\n", (uintmax_t)ino, nlink, reqlink); ino_reclaim(ip, ino, mode); return; } DIP_SET(ip, di_nlink, nlink); ino_dirty(ino); } /* * Adjust the inode link count to 'nlink'. If the count reaches zero * free it. */ static void ino_adjust(struct suj_ino *sino) { struct jrefrec *rrec; struct suj_rec *srec; struct suj_ino *stmp; union dinode *ip; nlink_t nlink; + nlink_t reqlink; int recmode; - int reqlink; int isdot; int mode; ino_t ino; nlink = sino->si_nlink; ino = sino->si_ino; mode = sino->si_mode & IFMT; /* * If it's a directory with no dot links, it was truncated before * the name was cleared. We need to clear the dirent that * points at it. */ if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino, &recmode, &isdot) == 0) continue; ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino); break; } if (srec == NULL) errx(1, "Directory %ju name not found", (uintmax_t)ino); } /* * If it's a directory with no real names pointing to it go ahead * and truncate it. This will free any children. */ if (mode == IFDIR && nlink - sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; /* * Mark any .. links so they know not to free this inode * when they are removed. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_diroff == DOTDOT_OFFSET) { stmp = ino_lookup(rrec->jr_parent, 0); if (stmp) ino_setskip(stmp, ino); } } } ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; if (nlink > LINK_MAX) err_suj("ino %ju nlink manipulation error, new %ju, old %d\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink)); if (debug) printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink), sino->si_mode); if (mode == 0) { if (debug) printf("ino %ju, zero inode freeing bitmap\n", (uintmax_t)ino); ino_free(ino, sino->si_mode); return; } /* XXX Should be an assert? */ if (mode != sino->si_mode && debug) printf("ino %ju, mode %o != %o\n", (uintmax_t)ino, mode, sino->si_mode); if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; /* If the inode doesn't have enough links to live, free it. */ if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %ju < %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)reqlink); ino_reclaim(ip, ino, mode); return; } /* If required write the updated link count. */ if (DIP(ip, di_nlink) == nlink) { if (debug) printf("ino %ju, link matches, skipping.\n", (uintmax_t)ino); return; } DIP_SET(ip, di_nlink, nlink); ino_dirty(ino); } /* * Truncate some or all blocks in an indirect, freeing any that are required * and zeroing the indirect. */ static void indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) { ufs2_daddr_t *bap2; ufs1_daddr_t *bap1; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t next; ufs_lbn_t nlbn; int dirty; int level; int i; if (blk == 0) return; dirty = 0; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bap1 = (void *)dblk_read(blk, fs->fs_bsize); bap2 = (void *)bap1; for (i = 0; i < NINDIR(fs); i++) { if (fs->fs_magic == FS_UFS1_MAGIC) nblk = *bap1++; else nblk = *bap2++; if (nblk == 0) continue; if (level != 0) { nlbn = (lbn + 1) - (i * lbnadd); /* * Calculate the lbn of the next indirect to * determine if any of this indirect must be * reclaimed. */ next = -(lbn + level) + ((i+1) * lbnadd); if (next <= lastlbn) continue; indir_trunc(ino, nlbn, nblk, lastlbn); /* If all of this indirect was reclaimed, free it. */ nlbn = next - lbnadd; if (nlbn < lastlbn) continue; } else { nlbn = -lbn + i * lbnadd; if (nlbn < lastlbn) continue; } dirty = 1; blk_free(nblk, 0, fs->fs_frag); if (fs->fs_magic == FS_UFS1_MAGIC) *(bap1 - 1) = 0; else *(bap2 - 1) = 0; } if (dirty) dblk_dirty(blk); } /* * Truncate an inode to the minimum of the given size or the last populated * block after any over size have been discarded. The kernel would allocate * the last block in the file but fsck does not and neither do we. This * code never extends files, only shrinks them. */ static void ino_trunc(ino_t ino, off_t size) { union dinode *ip; ufs2_daddr_t bn; uint64_t totalfrags; ufs_lbn_t nextlbn; ufs_lbn_t lastlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; ufs_lbn_t i; int frags; off_t cursize; off_t off; int mode; ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; cursize = DIP(ip, di_size); if (debug) printf("Truncating ino %ju, mode %o to size %jd from size %jd\n", (uintmax_t)ino, mode, size, cursize); /* Skip datablocks for short links and devices. */ if (mode == 0 || mode == IFBLK || mode == IFCHR || (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) return; /* Don't extend. */ if (size > cursize) size = cursize; lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < UFS_NDADDR; i++) { if (DIP(ip, di_db[i]) == 0) continue; frags = sblksize(fs, cursize, i); frags = numfrags(fs, frags); blk_free(DIP(ip, di_db[i]), 0, frags); DIP_SET(ip, di_db[i], 0); } /* * Follow indirect blocks, freeing anything required. */ for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; if (DIP(ip, di_ib[i]) == 0) continue; indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); /* If we freed everything in this indirect free the indir. */ if (lastlbn > lbn) continue; blk_free(DIP(ip, di_ib[i]), 0, frags); DIP_SET(ip, di_ib[i], 0); } ino_dirty(ino); /* * Now that we've freed any whole blocks that exceed the desired * truncation size, figure out how many blocks remain and what the * last populated lbn is. We will set the size to this last lbn * rather than worrying about allocating the final lbn as the kernel * would've done. This is consistent with normal fsck behavior. */ visitlbn = 0; totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); if (size > lblktosize(fs, visitlbn + 1)) size = lblktosize(fs, visitlbn + 1); /* * If we're truncating direct blocks we have to adjust frags * accordingly. */ if (visitlbn < UFS_NDADDR && totalfrags) { long oldspace, newspace; bn = DIP(ip, di_db[visitlbn]); if (bn == 0) err_suj("Bad blk at ino %ju lbn %jd\n", (uintmax_t)ino, visitlbn); oldspace = sblksize(fs, cursize, visitlbn); newspace = sblksize(fs, size, visitlbn); if (oldspace != newspace) { bn += numfrags(fs, newspace); frags = numfrags(fs, oldspace - newspace); blk_free(bn, 0, frags); totalfrags -= frags; } } DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); DIP_SET(ip, di_size, size); /* * If we've truncated into the middle of a block or frag we have * to zero it here. Otherwise the file could extend into * uninitialized space later. */ off = blkoff(fs, size); if (off && DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; bn = ino_blkatoff(ip, ino, visitlbn, &frags); if (bn == 0) err_suj("Block missing from ino %ju at lbn %jd\n", (uintmax_t)ino, visitlbn); clrsize = frags * fs->fs_fsize; buf = dblk_read(bn, clrsize); clrsize -= off; buf += off; bzero(buf, clrsize); dblk_dirty(bn); } return; } /* * Process records available for one inode and determine whether the * link count is correct or needs adjusting. */ static void ino_check(struct suj_ino *sino) { struct suj_rec *srec; struct jrefrec *rrec; nlink_t dotlinks; - int newlinks; - int removes; - int nlink; + nlink_t newlinks; + nlink_t removes; + nlink_t nlink; ino_t ino; int isdot; int isat; int mode; if (sino->si_hasrecs == 0) return; ino = sino->si_ino; rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; nlink = rrec->jr_nlink; newlinks = 0; dotlinks = 0; removes = sino->si_nlinkadj; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, rrec->jr_ino, &mode, &isdot); if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) err_suj("Inode mode/directory type mismatch %o != %o\n", mode, rrec->jr_mode); if (debug) printf("jrefrec: op %d ino %ju, nlink %ju, parent %ju, " "diroff %jd, mode %o, isat %d, isdot %d\n", rrec->jr_op, (uintmax_t)rrec->jr_ino, (uintmax_t)rrec->jr_nlink, (uintmax_t)rrec->jr_parent, (uintmax_t)rrec->jr_diroff, rrec->jr_mode, isat, isdot); mode = rrec->jr_mode & IFMT; if (rrec->jr_op == JOP_REMREF) removes++; newlinks += isat; if (isdot) dotlinks += isat; } /* * The number of links that remain are the starting link count * subtracted by the total number of removes with the total * links discovered back in. An incomplete remove thus * makes no change to the link count but an add increases * by one. */ if (debug) printf( "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks, (uintmax_t)removes, (uintmax_t)dotlinks); nlink += newlinks; nlink -= removes; sino->si_linkadj = 1; sino->si_nlink = nlink; sino->si_dotlinks = dotlinks; sino->si_mode = mode; ino_adjust(sino); } /* * Process records available for one block and determine whether it is * still allocated and whether the owning inode needs to be updated or * a free completed. */ static void blk_check(struct suj_blk *sblk) { struct suj_rec *srec; struct jblkrec *brec; struct suj_ino *sino; ufs2_daddr_t blk; int mask; int frags; int isat; /* * Each suj_blk actually contains records for any fragments in that * block. As a result we must evaluate each record individually. */ sino = NULL; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; frags = brec->jb_frags; blk = brec->jb_blkno + brec->jb_oldfrags; isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); if (sino == NULL || sino->si_ino != brec->jb_ino) { sino = ino_lookup(brec->jb_ino, 1); sino->si_blkadj = 1; } if (debug) printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n", brec->jb_op, blk, (uintmax_t)brec->jb_ino, brec->jb_lbn, brec->jb_frags, isat, frags); /* * If we found the block at this address we still have to * determine if we need to free the tail end that was * added by adding contiguous fragments from the same block. */ if (isat == 1) { if (frags == brec->jb_frags) continue; mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; frags = brec->jb_frags - frags; blk_free(blk, mask, frags); continue; } /* * The block wasn't found, attempt to free it. It won't be * freed if it was actually reallocated. If this was an * allocation we don't want to follow indirects as they * may not be written yet. Any children of the indirect will * have their own records. If it's a free we need to * recursively free children. */ blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, brec->jb_op == JOP_FREEBLK); } } /* * Walk the list of inode records for this cg and resolve moved and duplicate * inode references now that we have a complete picture. */ static void cg_build(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_build(sino); } /* * Handle inodes requiring truncation. This must be done prior to * looking up any inodes in directories. */ static void cg_trunc(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); sino->si_blkadj = 0; sino->si_trunc = NULL; } if (sino->si_blkadj) ino_adjblks(sino); } } } static void cg_adj_blk(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_blkadj) ino_adjblks(sino); } } } /* * Free any partially allocated blocks and then resolve inode block * counts. */ static void cg_check_blk(struct suj_cg *sc) { struct suj_blk *sblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); } /* * Walk the list of inode records for this cg, recovering any * changes which were not complete at the time of crash. */ static void cg_check_ino(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_check(sino); } /* * Write a potentially dirty cg. Recalculate the summary information and * update the superblock summary. */ static void cg_write(struct suj_cg *sc) { ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; struct cg *cgp; int blk; int i; if (sc->sc_dirty == 0) return; /* * Fix the frag and cluster summary. */ cgp = sc->sc_cgp; cgp->cg_cs.cs_nbfree = 0; cgp->cg_cs.cs_nffree = 0; bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); maxbno = fragstoblks(fs, fs->fs_fpg); if (fs->fs_contigsumsize > 0) { for (i = 1; i <= fs->fs_contigsumsize; i++) cg_clustersum(cgp)[i] = 0; bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); } blksfree = cg_blksfree(cgp); for (cgbno = 0; cgbno < maxbno; cgbno++) { if (ffs_isfreeblock(fs, blksfree, cgbno)) continue; if (ffs_isblock(fs, blksfree, cgbno)) { ffs_clusteracct(fs, cgp, cgbno, 1); cgp->cg_cs.cs_nbfree++; continue; } fragno = blkstofrags(fs, cgbno); blk = blkmap(fs, blksfree, fragno); ffs_fragacct(fs, blk, cgp->cg_frsum, 1); for (i = 0; i < fs->fs_frag; i++) if (isset(blksfree, fragno + i)) cgp->cg_cs.cs_nffree++; } /* * Update the superblock cg summary from our now correct values * before writing the block. */ fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, fs->fs_bsize) == -1) err_suj("Unable to write cylinder group %d\n", sc->sc_cgx); } /* * Write out any modified inodes. */ static void cg_write_inos(struct suj_cg *sc) { struct ino_blk *iblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) if (iblk->ib_dirty) iblk_write(iblk); } static void cg_apply(void (*apply)(struct suj_cg *)) { struct suj_cg *scg; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(scg, &cghash[i], sc_next) apply(scg); } /* * Process the unlinked but referenced file list. Freeing all inodes. */ static void ino_unlinked(void) { union dinode *ip; uint16_t mode; ino_t inon; ino_t ino; ino = fs->fs_sujfree; fs->fs_sujfree = 0; while (ino != 0) { ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; inon = DIP(ip, di_freelink); DIP_SET(ip, di_freelink, 0); /* * XXX Should this be an errx? */ if (DIP(ip, di_nlink) == 0) { if (debug) printf("Freeing unlinked ino %ju mode %o\n", (uintmax_t)ino, mode); ino_reclaim(ip, ino, mode); } else if (debug) printf("Skipping ino %ju mode %o with link %d\n", (uintmax_t)ino, mode, DIP(ip, di_nlink)); ino = inon; } } /* * Append a new record to the list of records requiring processing. */ static void ino_append(union jrec *rec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_ino *sino; struct suj_rec *srec; mvrec = &rec->rec_jmvrec; refrec = &rec->rec_jrefrec; if (debug && mvrec->jm_op == JOP_MVREF) printf("ino move: ino %ju, parent %ju, " "diroff %jd, oldoff %jd\n", (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent, (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff); else if (debug && (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) printf("ino ref: op %d, ino %ju, nlink %ju, " "parent %ju, diroff %jd\n", refrec->jr_op, (uintmax_t)refrec->jr_ino, (uintmax_t)refrec->jr_nlink, (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff); sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); } /* * Add a reference adjustment to the sino list and eliminate dups. The * primary loop in ino_build_ref() checks for dups but new ones may be * created as a result of offset adjustments. */ static void ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct suj_rec *srn; struct jrefrec *rrn; refrec = (struct jrefrec *)srec->sr_rec; /* * We walk backwards so that the oldest link count is preserved. If * an add record conflicts with a remove keep the remove. Redundant * removes are eliminated in ino_build_ref. Otherwise we keep the * oldest record at a given location. */ for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) continue; if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { rrn->jr_mode = refrec->jr_mode; return; } /* * Adding a remove. * * Replace the record in place with the old nlink in case * we replace the head of the list. Abandon srec as a dup. */ refrec->jr_nlink = rrn->jr_nlink; srn->sr_rec = srec->sr_rec; return; } TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); } /* * Create a duplicate of a reference at a previous location. */ static void ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) { struct jrefrec *rrn; struct suj_rec *srn; rrn = errmalloc(sizeof(*refrec)); *rrn = *refrec; rrn->jr_op = JOP_ADDREF; rrn->jr_diroff = diroff; srn = errmalloc(sizeof(*srn)); srn->sr_rec = (union jrec *)rrn; ino_add_ref(sino, srn); } /* * Add a reference to the list at all known locations. We follow the offset * changes for a single instance and create duplicate add refs at each so * that we can tolerate any version of the directory block. Eliminate * removes which collide with adds that are seen in the journal. They should * not adjust the link count down. */ static void ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_rec *srp; struct suj_rec *srn; struct jrefrec *rrn; off_t diroff; refrec = (struct jrefrec *)srec->sr_rec; /* * Search for a mvrec that matches this offset. Whether it's an add * or a remove we can delete the mvref after creating a dup record in * the old location. */ if (!TAILQ_EMPTY(&sino->si_movs)) { diroff = refrec->jr_diroff; for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { srp = TAILQ_PREV(srn, srechd, sr_next); mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_newoff != diroff) continue; diroff = mvrec->jm_oldoff; TAILQ_REMOVE(&sino->si_movs, srn, sr_next); free(srn); ino_dup_ref(sino, refrec, diroff); } } /* * If a remove wasn't eliminated by an earlier add just append it to * the list. */ if (refrec->jr_op == JOP_REMREF) { ino_add_ref(sino, srec); return; } /* * Walk the list of records waiting to be added to the list. We * must check for moves that apply to our current offset and remove * them from the list. Remove any duplicates to eliminate removes * with corresponding adds. */ TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { switch (srn->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: /* * This should actually be an error we should * have a remove for every add journaled. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; case JOP_REMREF: /* * Once we remove the current iteration of the * record at this address we're done. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); ino_add_ref(sino, srec); return; case JOP_MVREF: /* * Update our diroff based on any moves that match * and remove the move. */ mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_oldoff != refrec->jr_diroff) break; ino_dup_ref(sino, refrec, mvrec->jm_oldoff); refrec->jr_diroff = mvrec->jm_newoff; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; default: err_suj("ino_build_ref: Unknown op %d\n", srn->sr_rec->rec_jrefrec.jr_op); } } ino_add_ref(sino, srec); } /* * Walk the list of new records and add them in-order resolving any * dups and adjusted offsets. */ static void ino_build(struct suj_ino *sino) { struct suj_rec *srec; while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); switch (srec->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: ino_build_ref(sino, srec); break; case JOP_MVREF: /* * Add this mvrec to the queue of pending mvs. */ TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); break; default: err_suj("ino_build: Unknown op %d\n", srec->sr_rec->rec_jrefrec.jr_op); } } if (TAILQ_EMPTY(&sino->si_recs)) sino->si_hasrecs = 0; } /* * Modify journal records so they refer to the base block number * and a start and end frag range. This is to facilitate the discovery * of overlapping fragment allocations. */ static void blk_build(struct jblkrec *blkrec) { struct suj_rec *srec; struct suj_blk *sblk; struct jblkrec *blkrn; ufs2_daddr_t blk; int frag; if (debug) printf("blk_build: op %d blkno %jd frags %d oldfrags %d " "ino %ju lbn %jd\n", blkrec->jb_op, (uintmax_t)blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_oldfrags, (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn); blk = blknum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno); sblk = blk_lookup(blk, 1); /* * Rewrite the record using oldfrags to indicate the offset into * the block. Leave jb_frags as the actual allocated count. */ blkrec->jb_blkno -= frag; blkrec->jb_oldfrags = frag; if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) err_suj("Invalid fragment count %d oldfrags %d\n", blkrec->jb_frags, frag); /* * Detect dups. If we detect a dup we always discard the oldest * record as it is superseded by the new record. This speeds up * later stages but also eliminates free records which are used * to indicate that the contents of indirects can be trusted. */ TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { blkrn = (struct jblkrec *)srec->sr_rec; if (blkrn->jb_ino != blkrec->jb_ino || blkrn->jb_lbn != blkrec->jb_lbn || blkrn->jb_blkno != blkrec->jb_blkno || blkrn->jb_frags != blkrec->jb_frags || blkrn->jb_oldfrags != blkrec->jb_oldfrags) continue; if (debug) printf("Removed dup.\n"); /* Discard the free which is a dup with an alloc. */ if (blkrec->jb_op == JOP_FREEBLK) return; TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); free(srec); break; } srec = errmalloc(sizeof(*srec)); srec->sr_rec = (union jrec *)blkrec; TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); } static void ino_build_trunc(struct jtrncrec *rec) { struct suj_ino *sino; if (debug) printf("ino_build_trunc: op %d ino %ju, size %jd\n", rec->jt_op, (uintmax_t)rec->jt_ino, (uintmax_t)rec->jt_size); sino = ino_lookup(rec->jt_ino, 1); if (rec->jt_op == JOP_SYNC) { sino->si_trunc = NULL; return; } if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) sino->si_trunc = rec; } /* * Build up tables of the operations we need to recover. */ static void suj_build(void) { struct suj_seg *seg; union jrec *rec; int off; int i; TAILQ_FOREACH(seg, &allsegs, ss_next) { if (debug) printf("seg %jd has %d records, oldseq %jd.\n", seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, seg->ss_rec.jsr_oldest); off = 0; rec = (union jrec *)seg->ss_blk; for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { /* skip the segrec. */ if ((off % real_dev_bsize) == 0) continue; switch (rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: case JOP_MVREF: ino_append(rec); break; case JOP_NEWBLK: case JOP_FREEBLK: blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: err_suj("Unknown journal operation %d (%d)\n", rec->rec_jrefrec.jr_op, off); } i++; } } } /* * Prune the journal segments to those we care about based on the * oldest sequence in the newest segment. Order the segment list * based on sequence number. */ static void suj_prune(void) { struct suj_seg *seg; struct suj_seg *segn; uint64_t newseq; int discard; if (debug) printf("Pruning up to %jd\n", oldseq); /* First free the expired segments. */ TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (seg->ss_rec.jsr_seq >= oldseq) continue; TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } /* Next ensure that segments are ordered properly. */ seg = TAILQ_FIRST(&allsegs); if (seg == NULL) { if (debug) printf("Empty journal\n"); return; } newseq = seg->ss_rec.jsr_seq; for (;;) { seg = TAILQ_LAST(&allsegs, seghd); if (seg->ss_rec.jsr_seq >= newseq) break; TAILQ_REMOVE(&allsegs, seg, ss_next); TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); newseq = seg->ss_rec.jsr_seq; } if (newseq != oldseq) { TAILQ_FOREACH(seg, &allsegs, ss_next) { printf("%jd, ", seg->ss_rec.jsr_seq); } printf("\n"); err_suj("Journal file sequence mismatch %jd != %jd\n", newseq, oldseq); } /* * The kernel may asynchronously write segments which can create * gaps in the sequence space. Throw away any segments after the * gap as the kernel guarantees only those that are contiguously * reachable are marked as completed. */ discard = 0; TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (!discard && newseq++ == seg->ss_rec.jsr_seq) { jrecs += seg->ss_rec.jsr_cnt; jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize; continue; } discard = 1; if (debug) printf("Journal order mismatch %jd != %jd pruning\n", newseq-1, seg->ss_rec.jsr_seq); TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (debug) printf("Processing journal segments from %jd to %jd\n", oldseq, newseq-1); } /* * Verify the journal inode before attempting to read records. */ static int suj_verifyino(union dinode *ip) { if (DIP(ip, di_nlink) != 1) { printf("Invalid link count %d for journal inode %ju\n", DIP(ip, di_nlink), (uintmax_t)sujino); return (-1); } if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %ju\n", DIP(ip, di_flags), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_mode) != (IFREG | IREAD)) { printf("Invalid mode %o for journal inode %ju\n", DIP(ip, di_mode), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_size) < SUJ_MIN) { printf("Invalid size %jd for journal inode %ju\n", DIP(ip, di_size), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_modrev) != fs->fs_mtime) { printf("Journal timestamp does not match fs mount time\n"); return (-1); } return (0); } struct jblocks { struct jextent *jb_extent; /* Extent array. */ int jb_avail; /* Available extents. */ int jb_used; /* Last used extent. */ int jb_head; /* Allocator head. */ int jb_off; /* Allocator extent offset. */ }; struct jextent { ufs2_daddr_t je_daddr; /* Disk block address. */ int je_blocks; /* Disk block count. */ }; static struct jblocks *suj_jblocks; static struct jblocks * jblocks_create(void) { struct jblocks *jblocks; int size; jblocks = errmalloc(sizeof(*jblocks)); jblocks->jb_avail = 10; jblocks->jb_used = 0; jblocks->jb_head = 0; jblocks->jb_off = 0; size = sizeof(struct jextent) * jblocks->jb_avail; jblocks->jb_extent = errmalloc(size); bzero(jblocks->jb_extent, size); return (jblocks); } /* * Return the next available disk block and the amount of contiguous * free space it contains. */ static ufs2_daddr_t jblocks_next(struct jblocks *jblocks, int bytes, int *actual) { struct jextent *jext; ufs2_daddr_t daddr; int freecnt; int blocks; blocks = bytes / disk->d_bsize; jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks - jblocks->jb_off; if (freecnt == 0) { jblocks->jb_off = 0; if (++jblocks->jb_head > jblocks->jb_used) return (0); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks; } if (freecnt > blocks) freecnt = blocks; *actual = freecnt * disk->d_bsize; daddr = jext->je_daddr + jblocks->jb_off; return (daddr); } /* * Advance the allocation head by a specified number of bytes, consuming * one journal segment. */ static void jblocks_advance(struct jblocks *jblocks, int bytes) { jblocks->jb_off += bytes / disk->d_bsize; } static void jblocks_destroy(struct jblocks *jblocks) { free(jblocks->jb_extent); free(jblocks); } static void jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) { struct jextent *jext; int size; jext = &jblocks->jb_extent[jblocks->jb_used]; /* Adding the first block. */ if (jext->je_daddr == 0) { jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* Extending the last extent. */ if (jext->je_daddr + jext->je_blocks == daddr) { jext->je_blocks += blocks; return; } /* Adding a new extent. */ if (++jblocks->jb_used == jblocks->jb_avail) { jblocks->jb_avail *= 2; size = sizeof(struct jextent) * jblocks->jb_avail; jext = errmalloc(size); bzero(jext, size); bcopy(jblocks->jb_extent, jext, sizeof(struct jextent) * jblocks->jb_used); free(jblocks->jb_extent); jblocks->jb_extent = jext; } jext = &jblocks->jb_extent[jblocks->jb_used]; jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* * Add a file block from the journal to the extent map. We can't read * each file block individually because the kernel treats it as a circular * buffer and segments may span mutliple contiguous blocks. */ static void suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); } static void suj_read(void) { uint8_t block[1 * 1024 * 1024]; struct suj_seg *seg; struct jsegrec *recn; struct jsegrec *rec; ufs2_daddr_t blk; int readsize; int blocks; int recsize; int size; int i; /* * Read records until we exhaust the journal space. If we find * an invalid record we start searching for a valid segment header * at the next block. This is because we don't have a head/tail * pointer and must recover the information indirectly. At the gap * between the head and tail we won't necessarily have a valid * segment. */ restart: for (;;) { size = sizeof(block); blk = jblocks_next(suj_jblocks, size, &readsize); if (blk == 0) return; size = readsize; /* * Read 1MB at a time and scan for records within this block. */ if (bread(disk, blk, &block, size) == -1) { err_suj("Error reading journal block %jd\n", (intmax_t)blk); } for (rec = (void *)block; size; size -= recsize, rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { recsize = real_dev_bsize; if (rec->jsr_time != fs->fs_mtime) { if (debug) printf("Rec time %jd != fs mtime %jd\n", rec->jsr_time, fs->fs_mtime); jblocks_advance(suj_jblocks, recsize); continue; } if (rec->jsr_cnt == 0) { if (debug) printf("Found illegal count %d\n", rec->jsr_cnt); jblocks_advance(suj_jblocks, recsize); continue; } blocks = rec->jsr_blocks; recsize = blocks * real_dev_bsize; if (recsize > size) { /* * We may just have run out of buffer, restart * the loop to re-read from this spot. */ if (size < fs->fs_bsize && size != readsize && recsize <= fs->fs_bsize) goto restart; if (debug) printf("Found invalid segsize %d > %d\n", recsize, size); recsize = real_dev_bsize; jblocks_advance(suj_jblocks, recsize); continue; } /* * Verify that all blocks in the segment are present. */ for (i = 1; i < blocks; i++) { recn = (void *)((uintptr_t)rec) + i * real_dev_bsize; if (recn->jsr_seq == rec->jsr_seq && recn->jsr_time == rec->jsr_time) continue; if (debug) printf("Incomplete record %jd (%d)\n", rec->jsr_seq, i); recsize = i * real_dev_bsize; jblocks_advance(suj_jblocks, recsize); goto restart; } seg = errmalloc(sizeof(*seg)); seg->ss_blk = errmalloc(recsize); seg->ss_rec = *rec; bcopy((void *)rec, seg->ss_blk, recsize); if (rec->jsr_oldest > oldseq) oldseq = rec->jsr_oldest; TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); jblocks_advance(suj_jblocks, recsize); } } } /* * Search a directory block for the SUJ_FILE. */ static void suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { char block[MAXBSIZE]; struct direct *dp; int bytes; int off; if (sujino) return; bytes = lfragtosize(fs, frags); if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) err_suj("Failed to read UFS_ROOTINO directory block %jd\n", blk); for (off = 0; off < bytes; off += dp->d_reclen) { dp = (struct direct *)&block[off]; if (dp->d_reclen == 0) break; if (dp->d_ino == 0) continue; if (dp->d_namlen != strlen(SUJ_FILE)) continue; if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) continue; sujino = dp->d_ino; return; } } /* * Orchestrate the verification of a filesystem via the softupdates journal. */ int suj_check(const char *filesys) { union dinode *jip; union dinode *ip; uint64_t blocks; int retval; struct suj_seg *seg; struct suj_seg *segn; initsuj(); opendisk(filesys); /* * Set an exit point when SUJ check failed */ retval = setjmp(jmpbuf); if (retval != 0) { pwarn("UNEXPECTED SU+J INCONSISTENCY\n"); TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (reply("FALLBACK TO FULL FSCK") == 0) { ckfini(0); exit(EEXIT); } else return (-1); } /* * Find the journal inode. */ ip = ino_read(UFS_ROOTINO); sujino = 0; ino_visit(ip, UFS_ROOTINO, suj_find, 0); if (sujino == 0) { printf("Journal inode removed. Use tunefs to re-create.\n"); sblock.fs_flags &= ~FS_SUJ; sblock.fs_sujfree = 0; return (-1); } /* * Fetch the journal inode and verify it. */ jip = ino_read(sujino); printf("** SU+J Recovering %s\n", filesys); if (suj_verifyino(jip) != 0) return (-1); /* * Build a list of journal blocks in jblocks before parsing the * available journal blocks in with suj_read(). */ printf("** Reading %jd byte journal from inode %ju.\n", DIP(jip, di_size), (uintmax_t)sujino); suj_jblocks = jblocks_create(); blocks = ino_visit(jip, sujino, suj_add_block, 0); if (blocks != numfrags(fs, DIP(jip, di_size))) { printf("Sparse journal inode %ju.\n", (uintmax_t)sujino); return (-1); } suj_read(); jblocks_destroy(suj_jblocks); suj_jblocks = NULL; if (preen || reply("RECOVER")) { printf("** Building recovery table.\n"); suj_prune(); suj_build(); cg_apply(cg_build); printf("** Resolving unreferenced inode list.\n"); ino_unlinked(); printf("** Processing journal entries.\n"); cg_apply(cg_trunc); cg_apply(cg_check_blk); cg_apply(cg_adj_blk); cg_apply(cg_check_ino); } if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) return (0); /* * To remain idempotent with partial truncations the free bitmaps * must be written followed by indirect blocks and lastly inode * blocks. This preserves access to the modified pointers until * they are freed. */ cg_apply(cg_write); dblk_write(); cg_apply(cg_write_inos); /* Write back superblock. */ closedisk(filesys); if (jrecs > 0 || jbytes > 0) { printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", freeinos, freedir, freeblocks, freefrags); } return (0); } static void initsuj(void) { int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_INIT(&cghash[i]); LIST_INIT(&dbhash[i]); } lastcg = NULL; lastblk = NULL; TAILQ_INIT(&allsegs); oldseq = 0; disk = NULL; fs = NULL; sujino = 0; freefrags = 0; freeblocks = 0; freeinos = 0; freedir = 0; jbytes = 0; jrecs = 0; suj_jblocks = NULL; } Index: head/share/man/man5/acct.5 =================================================================== --- head/share/man/man5/acct.5 (revision 318735) +++ head/share/man/man5/acct.5 (revision 318736) @@ -1,127 +1,127 @@ .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)acct.5 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd May 15, 2007 +.Dd February 13, 2017 .Dt ACCT 5 .Os .Sh NAME .Nm acct .Nd execution accounting file .Sh SYNOPSIS .In sys/types.h .In sys/acct.h .Sh DESCRIPTION The kernel maintains the following .Fa acct information structure for all processes. If a process terminates, and accounting is enabled, the kernel calls the .Xr acct 2 function call to prepare and append the record to the accounting file. .Bd -literal #define AC_COMM_LEN 16 /* - * Accounting structure version 2 (current). + * Accounting structure version 3 (current). * The first byte is always zero. * Time units are microseconds. */ -struct acctv2 { +struct acctv3 { uint8_t ac_zero; /* zero identifies new version */ uint8_t ac_version; /* record version number */ uint16_t ac_len; /* record length */ char ac_comm[AC_COMM_LEN]; /* command name */ float ac_utime; /* user time */ float ac_stime; /* system time */ float ac_etime; /* elapsed time */ time_t ac_btime; /* starting time */ uid_t ac_uid; /* user id */ gid_t ac_gid; /* group id */ float ac_mem; /* average memory usage */ float ac_io; /* count of IO blocks */ __dev_t ac_tty; /* controlling tty */ uint16_t ac_len2; /* record length */ union { - __dev_t ac_align; /* force v1 compatible alignment */ + uint32_t ac_align; /* force v1 compatible alignment */ #define AFORK 0x01 /* forked but not exec'ed */ /* ASU is no longer supported */ #define ASU 0x02 /* used super-user permissions */ #define ACOMPAT 0x04 /* used compatibility mode */ #define ACORE 0x08 /* dumped core */ #define AXSIG 0x10 /* killed by a signal */ #define ANVER 0x20 /* new record version */ uint8_t ac_flag; /* accounting flags */ } ac_trailer; #define ac_flagx ac_trailer.ac_flag }; .Ed .Pp If a terminated process was created by an .Xr execve 2 , the name of the executed file (at most ten characters of it) is saved in the field .Fa ac_comm and its status is saved by setting one of more of the following flags in .Fa ac_flag : .Dv AFORK , .Dv ACOMPAT , .Dv ACORE and .Dv ASIG . .Dv ASU is no longer supported. .Dv ANVER is always set in the above structure. .Sh SEE ALSO .Xr lastcomm 1 , .Xr acct 2 , .Xr execve 2 , .Xr sa 8 .Sh HISTORY A .Nm file format appeared in .At v7 . The current record format was introduced on May 2007. It is backwards compatible with the previous format, which is still documented in .In sys/acct.h and supported by .Xr lastcomm 1 and .Xr sa 8 . Index: head/share/man/man5/dir.5 =================================================================== --- head/share/man/man5/dir.5 (revision 318735) +++ head/share/man/man5/dir.5 (revision 318736) @@ -1,156 +1,167 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)dir.5 8.3 (Berkeley) 4/19/94 .\" $FreeBSD$ .\" -.Dd April 19, 1994 +.Dd February 13, 2017 .Dt DIR 5 .Os .Sh NAME .Nm dir , .Nm dirent .Nd directory file format .Sh SYNOPSIS .In dirent.h .Sh DESCRIPTION Directories provide a convenient hierarchical method of grouping files while obscuring the underlying details of the storage medium. A directory file is differentiated from a plain file by a flag in its .Xr inode 5 entry. It consists of records (directory entries) each of which contains information about a file and a pointer to the file itself. Directory entries may contain other directories as well as plain files; such nested directories are referred to as subdirectories. A hierarchy of directories and files is formed in this manner and is called a file system (or referred to as a file system tree). .\" An entry in this tree, .\" nested or not nested, .\" is a pathname. .Pp Each directory file contains two special directory entries; one is a pointer to the directory itself called dot .Ql .\& and the other a pointer to its parent directory called dot-dot .Ql \&.. . Dot and dot-dot are valid pathnames, however, the system root directory .Ql / , has no parent and dot-dot points to itself like dot. .Pp File system nodes are ordinary directory files on which has been grafted a file system object, such as a physical disk or a partitioned area of such a disk. (See .Xr mount 2 and .Xr mount 8 . ) .Pp The directory entry format is defined in the file .In sys/dirent.h (which should not be included directly by applications): .Bd -literal #ifndef _SYS_DIRENT_H_ #define _SYS_DIRENT_H_ #include /* * The dirent structure defines the format of directory entries returned by * the getdirentries(2) system call. * * A directory entry has a struct dirent at the front of it, containing its * inode number, the length of the entry, and the length of the name * contained in the entry. These are followed by the name padded to a 4 * byte boundary with null bytes. All names are guaranteed null terminated. * The maximum length of a name in a directory is MAXNAMLEN. + * Explicit pad is added between the last member of the header and + * d_name, to avoid having the ABI padding in the end of dirent on + * LP64 arches. There is code depending on d_name being last. Also, + * keeping this pad for ILP32 architectures simplifies compat32 layer. */ struct dirent { - __uint32_t d_fileno; /* file number of entry */ + ino_t d_fileno; /* file number of entry */ + off_t d_off; /* directory offset of entry */ __uint16_t d_reclen; /* length of this record */ - __uint8_t d_type; /* file type, see below */ + __uint8_t d_type; /* file type, see below */ __uint8_t d_namlen; /* length of string in d_name */ -#ifdef _POSIX_SOURCE - char d_name[255 + 1]; /* name must be no longer than this */ -#else + __uint32_t d_pad0; +#if __BSD_VISIBLE #define MAXNAMLEN 255 char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */ +#else + char d_name[255 + 1]; /* name must be no longer than this */ #endif }; /* * File types */ #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 /* * Convert between stat structure types and directory types. */ #define IFTODT(mode) (((mode) & 0170000) >> 12) #define DTTOIF(dirtype) ((dirtype) << 12) /* * The _GENERIC_DIRSIZ macro gives the minimum record length which will hold - * the directory entry. This requires the amount of space in struct direct + * the directory entry. This returns the amount of space in struct direct * without the d_name field, plus enough space for the name with a terminating - * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. + * null byte (dp->d_namlen+1), rounded up to a 8 byte boundary. + * + * XXX although this macro is in the implementation namespace, it requires + * a manifest constant that is not. */ -#define _GENERIC_DIRSIZ(dp) \ - ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) +#define _GENERIC_DIRLEN(namlen) \ + ((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7) +#define _GENERIC_DIRSIZ(dp) _GENERIC_DIRLEN((dp)->d_namlen) +#endif /* __BSD_VISIBLE */ #ifdef _KERNEL #define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp) #endif #endif /* !_SYS_DIRENT_H_ */ .Ed .Sh SEE ALSO .Xr fs 5 , .Xr inode 5 .Sh HISTORY A .Nm file format appeared in .At v7 . .Sh BUGS The usage of the member d_type of struct dirent is unportable as it is .Fx Ns -specific . It also may fail on certain file systems, for example the cd9660 file system. Index: head/sys/bsm/audit.h =================================================================== --- head/sys/bsm/audit.h (revision 318735) +++ head/sys/bsm/audit.h (revision 318736) @@ -1,345 +1,345 @@ /*- * Copyright (c) 2005-2009 Apple Inc. * Copyright (c) 2016 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BSM_AUDIT_H #define _BSM_AUDIT_H #include #include #define AUDIT_RECORD_MAGIC 0x828a0f1b #define MAX_AUDIT_RECORDS 20 #define MAXAUDITDATA (0x8000 - 1) #define MAX_AUDIT_RECORD_SIZE MAXAUDITDATA #define MIN_AUDIT_FILE_SIZE (512 * 1024) /* * Minimum noumber of free blocks on the filesystem containing the audit * log necessary to avoid a hard log rotation. DO NOT SET THIS VALUE TO 0 * as the kernel does an unsigned compare, plus we want to leave a few blocks * free so userspace can terminate the log, etc. */ #define AUDIT_HARD_LIMIT_FREE_BLOCKS 4 /* * Triggers for the audit daemon. */ #define AUDIT_TRIGGER_MIN 1 #define AUDIT_TRIGGER_LOW_SPACE 1 /* Below low watermark. */ #define AUDIT_TRIGGER_ROTATE_KERNEL 2 /* Kernel requests rotate. */ #define AUDIT_TRIGGER_READ_FILE 3 /* Re-read config file. */ #define AUDIT_TRIGGER_CLOSE_AND_DIE 4 /* Terminate audit. */ #define AUDIT_TRIGGER_NO_SPACE 5 /* Below min free space. */ #define AUDIT_TRIGGER_ROTATE_USER 6 /* User requests rotate. */ #define AUDIT_TRIGGER_INITIALIZE 7 /* User initialize of auditd. */ #define AUDIT_TRIGGER_EXPIRE_TRAILS 8 /* User expiration of trails. */ #define AUDIT_TRIGGER_MAX 8 /* * The special device filename (FreeBSD). */ #define AUDITDEV_FILENAME "audit" #define AUDIT_TRIGGER_FILE ("/dev/" AUDITDEV_FILENAME) /* * Pre-defined audit IDs */ #define AU_DEFAUDITID (uid_t)(-1) #define AU_DEFAUDITSID 0 #define AU_ASSIGN_ASID -1 /* * IPC types. */ #define AT_IPC_MSG ((u_char)1) /* Message IPC id. */ #define AT_IPC_SEM ((u_char)2) /* Semaphore IPC id. */ #define AT_IPC_SHM ((u_char)3) /* Shared mem IPC id. */ /* * Audit conditions. */ #define AUC_UNSET 0 #define AUC_AUDITING 1 #define AUC_NOAUDIT 2 #define AUC_DISABLED -1 /* * auditon(2) commands. */ #define A_OLDGETPOLICY 2 #define A_OLDSETPOLICY 3 #define A_GETKMASK 4 #define A_SETKMASK 5 #define A_OLDGETQCTRL 6 #define A_OLDSETQCTRL 7 #define A_GETCWD 8 #define A_GETCAR 9 #define A_GETSTAT 12 #define A_SETSTAT 13 #define A_SETUMASK 14 #define A_SETSMASK 15 #define A_OLDGETCOND 20 #define A_OLDSETCOND 21 #define A_GETCLASS 22 #define A_SETCLASS 23 #define A_GETPINFO 24 #define A_SETPMASK 25 #define A_SETFSIZE 26 #define A_GETFSIZE 27 #define A_GETPINFO_ADDR 28 #define A_GETKAUDIT 29 #define A_SETKAUDIT 30 #define A_SENDTRIGGER 31 #define A_GETSINFO_ADDR 32 #define A_GETPOLICY 33 #define A_SETPOLICY 34 #define A_GETQCTRL 35 #define A_SETQCTRL 36 #define A_GETCOND 37 #define A_SETCOND 38 #define A_GETEVENT 39 /* Get audit event-to-name mapping. */ #define A_SETEVENT 40 /* Set audit event-to-name mapping. */ /* * Audit policy controls. */ #define AUDIT_CNT 0x0001 #define AUDIT_AHLT 0x0002 #define AUDIT_ARGV 0x0004 #define AUDIT_ARGE 0x0008 #define AUDIT_SEQ 0x0010 #define AUDIT_WINDATA 0x0020 #define AUDIT_USER 0x0040 #define AUDIT_GROUP 0x0080 #define AUDIT_TRAIL 0x0100 #define AUDIT_PATH 0x0200 #define AUDIT_SCNT 0x0400 #define AUDIT_PUBLIC 0x0800 #define AUDIT_ZONENAME 0x1000 #define AUDIT_PERZONE 0x2000 /* * Default audit queue control parameters. */ #define AQ_HIWATER 100 #define AQ_MAXHIGH 10000 #define AQ_LOWATER 10 #define AQ_BUFSZ MAXAUDITDATA #define AQ_MAXBUFSZ 1048576 /* * Default minimum percentage free space on file system. */ #define AU_FS_MINFREE 20 /* * Type definitions used indicating the length of variable length addresses * in tokens containing addresses, such as header fields. */ #define AU_IPv4 4 #define AU_IPv6 16 __BEGIN_DECLS typedef uid_t au_id_t; typedef pid_t au_asid_t; typedef u_int16_t au_event_t; typedef u_int16_t au_emod_t; typedef u_int32_t au_class_t; typedef u_int64_t au_asflgs_t __attribute__ ((aligned (8))); struct au_tid { - dev_t port; + u_int32_t port; /* XXX dev_t compatibility */ u_int32_t machine; }; typedef struct au_tid au_tid_t; struct au_tid_addr { - dev_t at_port; + u_int32_t at_port; /* XXX dev_t compatibility */ u_int32_t at_type; u_int32_t at_addr[4]; }; typedef struct au_tid_addr au_tid_addr_t; struct au_mask { unsigned int am_success; /* Success bits. */ unsigned int am_failure; /* Failure bits. */ }; typedef struct au_mask au_mask_t; struct auditinfo { au_id_t ai_auid; /* Audit user ID. */ au_mask_t ai_mask; /* Audit masks. */ au_tid_t ai_termid; /* Terminal ID. */ au_asid_t ai_asid; /* Audit session ID. */ }; typedef struct auditinfo auditinfo_t; struct auditinfo_addr { au_id_t ai_auid; /* Audit user ID. */ au_mask_t ai_mask; /* Audit masks. */ au_tid_addr_t ai_termid; /* Terminal ID. */ au_asid_t ai_asid; /* Audit session ID. */ au_asflgs_t ai_flags; /* Audit session flags. */ }; typedef struct auditinfo_addr auditinfo_addr_t; struct auditpinfo { pid_t ap_pid; /* ID of target process. */ au_id_t ap_auid; /* Audit user ID. */ au_mask_t ap_mask; /* Audit masks. */ au_tid_t ap_termid; /* Terminal ID. */ au_asid_t ap_asid; /* Audit session ID. */ }; typedef struct auditpinfo auditpinfo_t; struct auditpinfo_addr { pid_t ap_pid; /* ID of target process. */ au_id_t ap_auid; /* Audit user ID. */ au_mask_t ap_mask; /* Audit masks. */ au_tid_addr_t ap_termid; /* Terminal ID. */ au_asid_t ap_asid; /* Audit session ID. */ au_asflgs_t ap_flags; /* Audit session flags. */ }; typedef struct auditpinfo_addr auditpinfo_addr_t; struct au_session { auditinfo_addr_t *as_aia_p; /* Ptr to full audit info. */ au_mask_t as_mask; /* Process Audit Masks. */ }; typedef struct au_session au_session_t; /* * Contents of token_t are opaque outside of libbsm. */ typedef struct au_token token_t; /* * Kernel audit queue control parameters: * Default: Maximum: * aq_hiwater: AQ_HIWATER (100) AQ_MAXHIGH (10000) * aq_lowater: AQ_LOWATER (10) mach_port_name_t audit_session_self(void); au_asid_t audit_session_join(mach_port_name_t port); #endif /* __APPLE_API_PRIVATE */ #endif /* defined(_KERNEL) || defined(KERNEL) */ __END_DECLS #endif /* !_BSM_AUDIT_H */ Index: head/sys/cddl/compat/opensolaris/sys/dirent.h =================================================================== --- head/sys/cddl/compat/opensolaris/sys/dirent.h (revision 318735) +++ head/sys/cddl/compat/opensolaris/sys/dirent.h (revision 318736) @@ -1,47 +1,45 @@ /*- * Copyright (c) 2007 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _OPENSOLARIS_SYS_DIRENT_H_ #define _OPENSOLARIS_SYS_DIRENT_H_ #include #include_next typedef struct dirent dirent64_t; typedef ino_t ino64_t; #define dirent64 dirent #define d_ino d_fileno -#define DIRENT64_RECLEN(len) ((sizeof(struct dirent) - \ - sizeof(((struct dirent *)NULL)->d_name) + \ - (len) + 1 + 3) & ~3) +#define DIRENT64_RECLEN(len) _GENERIC_DIRLEN(len) #endif /* !_OPENSOLARIS_SYS_DIRENT_H_ */ Index: head/sys/compat/freebsd32/capabilities.conf =================================================================== --- head/sys/compat/freebsd32/capabilities.conf (revision 318735) +++ head/sys/compat/freebsd32/capabilities.conf (revision 318736) @@ -1,283 +1,288 @@ ## ## Copyright (c) 2008-2010 Robert N. M. Watson ## Copyright (c) 2016 The FreeBSD Foundation ## All rights reserved. ## ## This software was developed at the University of Cambridge Computer ## Laboratory with support from a grant from Google, Inc. ## ## Portions of this software were developed by Konstantin Belousov ## under sponsorship from the FreeBSD Foundation. ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## 1. Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## 2. Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ## ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ## SUCH DAMAGE. ## ## List of system calls enabled in freebsd32 capability mode, one name ## per line. See the original list in the sys/kern/capabilities.conf. ## Position of the compat syscall in this file must be identical to ## the master, to facilitate comparision and diagnostic. ## ## $FreeBSD$ ## __acl_aclcheck_fd __acl_delete_fd __acl_get_fd __acl_set_fd __mac_get_fd #__mac_get_pid __mac_get_proc __mac_set_fd __mac_set_proc freebsd32_sysctl freebsd32_umtx_op abort2 accept accept4 aio_cancel freebsd32_aio_error aio_fsync freebsd32_aio_read freebsd32_aio_return freebsd32_aio_suspend freebsd32_aio_waitcomplete freebsd32_aio_write #audit bindat cap_enter cap_fcntls_get cap_fcntls_limit cap_getmode freebsd32_cap_ioctls_get freebsd32_cap_ioctls_limit __cap_rights_get cap_rights_limit freebsd32_clock_getres freebsd32_clock_gettime close closefrom connectat #cpuset #freebsd32_cpuset_getaffinity #freebsd32_cpuset_getid #freebsd32_cpuset_setaffinity #freebsd32_cpuset_setid dup dup2 extattr_delete_fd extattr_get_fd extattr_list_fd extattr_set_fd fchflags fchmod fchown freebsd32_fcntl freebsd32_fexecve flock fork fpathconf +freebsd11_freebsd32_fstat +freebsd11_freebsd32_fstatat +freebsd11_freebsd32_getdirentries +freebsd11_freebsd32_fstatfs +freebsd11_freebsd32_mknodat freebsd6_freebsd32_ftruncate freebsd6_freebsd32_lseek freebsd6_freebsd32_mmap freebsd6_freebsd32_pread freebsd6_freebsd32_pwrite freebsd32_fstat fstatfs fsync ftruncate freebsd32_futimens freebsd32_futimes getaudit getaudit_addr getauid freebsd32_getcontext getdents freebsd32_getdirentries getdomainname getdtablesize getegid geteuid gethostid gethostname freebsd32_getitimer getgid getgroups getlogin freebsd32_getpagesize getpeername getpgid getpgrp getpid getppid getpriority getresgid getresuid getrlimit freebsd32_getrusage getsid getsockname getsockopt freebsd32_gettimeofday getuid freebsd32_ioctl issetugid freebsd32_kevent kill freebsd32_kmq_notify freebsd32_kmq_setattr freebsd32_kmq_timedreceive freebsd32_kmq_timedsend kqueue freebsd32_ktimer_create ktimer_delete ktimer_getoverrun freebsd32_ktimer_gettime freebsd32_ktimer_settime #ktrace freebsd32_lio_listio listen freebsd32_lseek madvise mincore minherit mlock mlockall freebsd32_mmap freebsd32_mprotect msync munlock munlockall munmap freebsd32_nanosleep ntp_gettime freebsd6_freebsd32_aio_read freebsd6_freebsd32_aio_write obreak freebsd6_freebsd32_lio_listio chflagsat faccessat fchmodat fchownat freebsd32_fstatat freebsd32_futimesat linkat mkdirat mkfifoat mknodat openat readlinkat renameat symlinkat unlinkat freebsd32_utimensat pdfork pdgetpid pdkill #pdwait4 # not yet implemented freebsd32_pipe pipe2 poll freebsd32_pread freebsd32_preadv profil #ptrace freebsd32_pwrite freebsd32_pwritev read freebsd32_readv freebsd6_freebsd32_recv freebsd32_recvfrom freebsd32_recvmsg rtprio rtprio_thread sbrk sched_get_priority_max sched_get_priority_min sched_getparam sched_getscheduler sched_rr_get_interval sched_setparam sched_setscheduler sched_yield sctp_generic_recvmsg sctp_generic_sendmsg sctp_generic_sendmsg_iov sctp_peeloff freebsd32_pselect freebsd32_select freebsd6_freebsd32_send freebsd32_sendfile freebsd32_sendmsg sendto setaudit setaudit_addr setauid freebsd32_setcontext setegid seteuid setgid freebsd32_setitimer setpriority setregid setresgid setresuid setreuid setrlimit setsid setsockopt setuid shm_open shutdown freebsd32_sigaction freebsd32_sigaltstack freebsd32_sigblock freebsd32_sigpending sigprocmask sigqueue freebsd32_sigreturn freebsd32_sigsetmask ofreebsd32_sigstack sigsuspend freebsd32_sigtimedwait freebsd32_sigvec freebsd32_sigwaitinfo sigwait socket socketpair sstk sync sys_exit freebsd32_sysarch thr_create thr_exit thr_kill #thr_kill2 freebsd32_thr_new thr_self thr_set_name freebsd32_thr_suspend thr_wake umask utrace uuidgen write freebsd32_writev yield Index: head/sys/compat/freebsd32/freebsd32.h =================================================================== --- head/sys/compat/freebsd32/freebsd32.h (revision 318735) +++ head/sys/compat/freebsd32/freebsd32.h (revision 318736) @@ -1,374 +1,413 @@ /*- * Copyright (c) 2001 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COMPAT_FREEBSD32_FREEBSD32_H_ #define _COMPAT_FREEBSD32_FREEBSD32_H_ #include #include #include #define PTRIN(v) (void *)(uintptr_t) (v) #define PTROUT(v) (u_int32_t)(uintptr_t) (v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) #define PTROUT_CP(src,dst,fld) \ do { (dst).fld = PTROUT((src).fld); } while (0) /* * Being a newer port, 32-bit FreeBSD/MIPS uses 64-bit time_t. */ #ifdef __mips__ typedef int64_t time32_t; #else typedef int32_t time32_t; #endif struct timeval32 { time32_t tv_sec; int32_t tv_usec; }; #define TV_CP(src,dst,fld) do { \ CP((src).fld,(dst).fld,tv_sec); \ CP((src).fld,(dst).fld,tv_usec); \ } while (0) struct timespec32 { time32_t tv_sec; int32_t tv_nsec; }; #define TS_CP(src,dst,fld) do { \ CP((src).fld,(dst).fld,tv_sec); \ CP((src).fld,(dst).fld,tv_nsec); \ } while (0) struct itimerspec32 { struct timespec32 it_interval; struct timespec32 it_value; }; #define ITS_CP(src, dst) do { \ TS_CP((src), (dst), it_interval); \ TS_CP((src), (dst), it_value); \ } while (0) struct rusage32 { struct timeval32 ru_utime; struct timeval32 ru_stime; int32_t ru_maxrss; int32_t ru_ixrss; int32_t ru_idrss; int32_t ru_isrss; int32_t ru_minflt; int32_t ru_majflt; int32_t ru_nswap; int32_t ru_inblock; int32_t ru_oublock; int32_t ru_msgsnd; int32_t ru_msgrcv; int32_t ru_nsignals; int32_t ru_nvcsw; int32_t ru_nivcsw; }; struct wrusage32 { struct rusage32 wru_self; struct rusage32 wru_children; }; struct itimerval32 { struct timeval32 it_interval; struct timeval32 it_value; }; -#define FREEBSD4_MNAMELEN (88 - 2 * sizeof(int32_t)) /* size of on/from name bufs */ +#define FREEBSD4_MFSNAMELEN 16 +#define FREEBSD4_MNAMELEN (88 - 2 * sizeof(int32_t)) /* 4.x version */ struct statfs32 { int32_t f_spare2; int32_t f_bsize; int32_t f_iosize; int32_t f_blocks; int32_t f_bfree; int32_t f_bavail; int32_t f_files; int32_t f_ffree; fsid_t f_fsid; uid_t f_owner; int32_t f_type; int32_t f_flags; int32_t f_syncwrites; int32_t f_asyncwrites; - char f_fstypename[MFSNAMELEN]; + char f_fstypename[FREEBSD4_MFSNAMELEN]; char f_mntonname[FREEBSD4_MNAMELEN]; int32_t f_syncreads; int32_t f_asyncreads; int16_t f_spares1; char f_mntfromname[FREEBSD4_MNAMELEN]; int16_t f_spares2 __packed; int32_t f_spare[2]; }; struct kevent32 { u_int32_t ident; /* identifier for this event */ short filter; /* filter for event */ u_short flags; u_int fflags; int32_t data; u_int32_t udata; /* opaque user data identifier */ }; struct iovec32 { u_int32_t iov_base; int iov_len; }; struct msghdr32 { u_int32_t msg_name; socklen_t msg_namelen; u_int32_t msg_iov; int msg_iovlen; u_int32_t msg_control; socklen_t msg_controllen; int msg_flags; }; +#if defined(__amd64__) +#define __STAT32_TIME_T_EXT 1 +#endif + struct stat32 { - dev_t st_dev; - ino_t st_ino; + dev_t st_dev; + ino_t st_ino; + nlink_t st_nlink; mode_t st_mode; - nlink_t st_nlink; + u_int16_t st_padding0; uid_t st_uid; gid_t st_gid; - dev_t st_rdev; + u_int32_t st_padding1; + dev_t st_rdev; +#ifdef __STAT32_TIME_T_EXT + __int32_t st_atim_ext; +#endif struct timespec32 st_atim; +#ifdef __STAT32_TIME_T_EXT + __int32_t st_mtim_ext; +#endif struct timespec32 st_mtim; +#ifdef __STAT32_TIME_T_EXT + __int32_t st_ctim_ext; +#endif struct timespec32 st_ctim; +#ifdef __STAT32_TIME_T_EXT + __int32_t st_btim_ext; +#endif + struct timespec32 st_birthtim; off_t st_size; int64_t st_blocks; u_int32_t st_blksize; u_int32_t st_flags; + u_int64_t st_gen; + u_int64_t st_spare[10]; +}; +struct freebsd11_stat32 { + u_int32_t st_dev; + u_int32_t st_ino; + mode_t st_mode; + u_int16_t st_nlink; + uid_t st_uid; + gid_t st_gid; + u_int32_t st_rdev; + struct timespec32 st_atim; + struct timespec32 st_mtim; + struct timespec32 st_ctim; + off_t st_size; + int64_t st_blocks; + u_int32_t st_blksize; + u_int32_t st_flags; u_int32_t st_gen; int32_t st_lspare; struct timespec32 st_birthtim; unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); }; struct ostat32 { __uint16_t st_dev; - ino_t st_ino; + __uint32_t st_ino; mode_t st_mode; - nlink_t st_nlink; + __uint16_t st_nlink; __uint16_t st_uid; __uint16_t st_gid; __uint16_t st_rdev; __int32_t st_size; struct timespec32 st_atim; struct timespec32 st_mtim; struct timespec32 st_ctim; __int32_t st_blksize; __int32_t st_blocks; u_int32_t st_flags; __uint32_t st_gen; }; struct jail32_v0 { u_int32_t version; uint32_t path; uint32_t hostname; u_int32_t ip_number; }; struct jail32 { uint32_t version; uint32_t path; uint32_t hostname; uint32_t jailname; uint32_t ip4s; uint32_t ip6s; uint32_t ip4; uint32_t ip6; }; struct sigaction32 { u_int32_t sa_u; int sa_flags; sigset_t sa_mask; }; struct thr_param32 { uint32_t start_func; uint32_t arg; uint32_t stack_base; uint32_t stack_size; uint32_t tls_base; uint32_t tls_size; uint32_t child_tid; uint32_t parent_tid; int32_t flags; uint32_t rtp; uint32_t spare[3]; }; struct i386_ldt_args32 { uint32_t start; uint32_t descs; uint32_t num; }; struct mq_attr32 { int mq_flags; int mq_maxmsg; int mq_msgsize; int mq_curmsgs; int __reserved[4]; }; struct kinfo_proc32 { int ki_structsize; int ki_layout; uint32_t ki_args; uint32_t ki_paddr; uint32_t ki_addr; uint32_t ki_tracep; uint32_t ki_textvp; uint32_t ki_fd; uint32_t ki_vmspace; uint32_t ki_wchan; pid_t ki_pid; pid_t ki_ppid; pid_t ki_pgid; pid_t ki_tpgid; pid_t ki_sid; pid_t ki_tsid; short ki_jobc; short ki_spare_short1; - dev_t ki_tdev; + uint32_t ki_tdev_freebsd11; sigset_t ki_siglist; sigset_t ki_sigmask; sigset_t ki_sigignore; sigset_t ki_sigcatch; uid_t ki_uid; uid_t ki_ruid; uid_t ki_svuid; gid_t ki_rgid; gid_t ki_svgid; short ki_ngroups; short ki_spare_short2; gid_t ki_groups[KI_NGROUPS]; uint32_t ki_size; int32_t ki_rssize; int32_t ki_swrss; int32_t ki_tsize; int32_t ki_dsize; int32_t ki_ssize; u_short ki_xstat; u_short ki_acflag; fixpt_t ki_pctcpu; u_int ki_estcpu; u_int ki_slptime; u_int ki_swtime; u_int ki_cow; u_int64_t ki_runtime; struct timeval32 ki_start; struct timeval32 ki_childtime; int ki_flag; int ki_kiflag; int ki_traceflag; char ki_stat; signed char ki_nice; char ki_lock; char ki_rqindex; u_char ki_oncpu_old; u_char ki_lastcpu_old; char ki_tdname[TDNAMLEN+1]; char ki_wmesg[WMESGLEN+1]; char ki_login[LOGNAMELEN+1]; char ki_lockname[LOCKNAMELEN+1]; char ki_comm[COMMLEN+1]; char ki_emul[KI_EMULNAMELEN+1]; char ki_loginclass[LOGINCLASSLEN+1]; char ki_moretdname[MAXCOMLEN-TDNAMLEN+1]; char ki_sparestrings[46]; int ki_spareints[KI_NSPARE_INT]; + uint64_t ki_tdev; int ki_oncpu; int ki_lastcpu; int ki_tracer; int ki_flag2; int ki_fibnum; u_int ki_cr_flags; int ki_jid; int ki_numthreads; lwpid_t ki_tid; struct priority ki_pri; struct rusage32 ki_rusage; struct rusage32 ki_rusage_ch; uint32_t ki_pcb; uint32_t ki_kstack; uint32_t ki_udata; uint32_t ki_tdaddr; uint32_t ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ int ki_sparelongs[KI_NSPARE_LONG]; int ki_sflag; int ki_tdflags; }; struct kinfo_sigtramp32 { uint32_t ksigtramp_start; uint32_t ksigtramp_end; uint32_t ksigtramp_spare[4]; }; struct kld32_file_stat_1 { int version; /* set to sizeof(struct kld_file_stat_1) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ }; struct kld32_file_stat { int version; /* set to sizeof(struct kld_file_stat) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ char pathname[MAXPATHLEN]; }; struct procctl_reaper_pids32 { u_int rp_count; u_int rp_pad0[15]; uint32_t rp_pids; }; #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 318735) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 318736) @@ -1,3153 +1,3321 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); #ifndef __mips__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifndef __mips__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); CTASSERT(sizeof(struct kevent32) == 20); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); +#ifdef __amd64__ +CTASSERT(sizeof(struct stat32) == 208); +#endif #ifndef __mips__ -CTASSERT(sizeof(struct stat32) == 96); +CTASSERT(sizeof(struct freebsd11_stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD10 int freebsd10_freebsd32_pipe(struct thread *td, struct freebsd10_freebsd32_pipe_args *uap) { return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); post_execve(td, error, oldvmspace); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } post_execve(td, error, oldvmspace); return (error); } +#if defined(COMPAT_FREEBSD11) int +freebsd11_freebsd32_mknod(struct thread *td, + struct freebsd11_freebsd32_mknod_args *uap) +{ + + return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, + uap->dev)); +} + +int +freebsd11_freebsd32_mknodat(struct thread *td, + struct freebsd11_freebsd32_mknodat_args *uap) +{ + + return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, + uap->dev)); +} +#endif /* COMPAT_FREEBSD11 */ + +int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ) != 0) prot |= PROT_EXEC; #endif return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len, prot)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent_copyout, .k_copyin = freebsd32_kevent_copyin, }; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct mbuf *m; void *md; u_int idx, len, msglen; int error; buflen = FREEBSD32_ALIGN(buflen); if (buflen > MCLBYTES) return (EINVAL); /* * Iterate over the buffer and get the length of each message * in there. This has 32-bit alignment and padding. Use it to * determine the length of these messages when using 64-bit * alignment and padding. */ idx = 0; len = 0; while (idx < buflen) { error = copyin(buf + idx, &msglen, sizeof(msglen)); if (error) return (error); if (msglen < sizeof(struct cmsghdr)) return (EINVAL); msglen = FREEBSD32_ALIGN(msglen); if (idx + msglen > buflen) return (EINVAL); idx += msglen; msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); len += CMSG_ALIGN(msglen); } if (len > MCLBYTES) return (EINVAL); m = m_get(M_WAITOK, MT_CONTROL); if (len > MLEN) MCLGET(m, M_WAITOK); m->m_len = len; md = mtod(m, void *); while (buflen > 0) { error = copyin(buf, md, sizeof(struct cmsghdr)); if (error) break; msglen = *(u_int *)md; msglen = FREEBSD32_ALIGN(msglen); /* Modify the message length to account for alignment. */ *(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr)); buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr)); buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); if (msglen > 0) { error = copyin(buf, md, msglen); if (error) break; md = (char *)md + CMSG_ALIGN(msglen); buf += msglen; buflen -= msglen; } } if (error) m_free(m); else *mp = m; return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif +#if defined(COMPAT_FREEBSD11) int +freebsd11_freebsd32_getdirentries(struct thread *td, + struct freebsd11_freebsd32_getdirentries_args *uap) +{ + long base; + int32_t base32; + int error; + + error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, + &base, NULL); + if (error) + return (error); + if (uap->basep != NULL) { + base32 = base; + error = copyout(&base32, uap->basep, sizeof(int32_t)); + } + return (error); +} + +int +freebsd11_freebsd32_getdents(struct thread *td, + struct freebsd11_freebsd32_getdents_args *uap) +{ + struct freebsd11_freebsd32_getdirentries_args ap; + + ap.fd = uap->fd; + ap.buf = uap->buf; + ap.count = uap->count; + ap.basep = NULL; + return (freebsd11_freebsd32_getdirentries(td, &ap)); +} +#endif /* COMPAT_FREEBSD11 */ + +int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); + out->st_padding0 = 0; + out->st_padding1 = 0; +#ifdef __STAT32_TIME_T_EXT + out->st_atim_ext = 0; + out->st_mtim_ext = 0; + out->st_ctim_ext = 0; + out->st_btim_ext = 0; +#endif + bzero(out->st_spare, sizeof(out->st_spare)); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); CP(*in, *out, st_size); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif -int -freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap) -{ - struct stat sb; - struct stat32 sb32; - int error; - - error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, - &sb, NULL); - if (error) - return (error); - copy_stat(&sb, &sb32); - error = copyout(&sb32, uap->ub, sizeof (sb32)); - return (error); -} - #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub, NULL); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } +#ifdef COMPAT_43 int -freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap) +ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; - struct stat32 sb32; + struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); + copy_ostat(&sb, &sb32); + error = copyout(&sb32, uap->ub, sizeof (sb32)); + return (error); +} +#endif + +int +freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap) +{ + struct stat sb; + struct stat32 sb32; + struct fhandle fh; + int error; + + error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); + if (error != 0) + return (error); + error = kern_fhstat(td, fh, &sb); + if (error != 0) + return (error); copy_stat(&sb, &sb32); + error = copyout(&sb32, uap->sb, sizeof (sb32)); + return (error); +} + +#if defined(COMPAT_FREEBSD11) +static void +freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out) +{ + + CP(*in, *out, st_ino); + CP(*in, *out, st_nlink); + CP(*in, *out, st_dev); + CP(*in, *out, st_mode); + CP(*in, *out, st_uid); + CP(*in, *out, st_gid); + CP(*in, *out, st_rdev); + TS_CP(*in, *out, st_atim); + TS_CP(*in, *out, st_mtim); + TS_CP(*in, *out, st_ctim); + CP(*in, *out, st_size); + CP(*in, *out, st_blocks); + CP(*in, *out, st_blksize); + CP(*in, *out, st_flags); + CP(*in, *out, st_gen); + TS_CP(*in, *out, st_birthtim); + out->st_lspare = 0; + bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim), + sizeof(*out) - offsetof(struct freebsd11_stat32, + st_birthtim) - sizeof(out->st_birthtim)); +} + +int +freebsd11_freebsd32_stat(struct thread *td, + struct freebsd11_freebsd32_stat_args *uap) +{ + struct stat sb; + struct freebsd11_stat32 sb32; + int error; + + error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, + &sb, NULL); + if (error != 0) + return (error); + freebsd11_cvtstat32(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } -#ifdef COMPAT_43 int -ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) +freebsd11_freebsd32_fstat(struct thread *td, + struct freebsd11_freebsd32_fstat_args *uap) { struct stat sb; - struct ostat32 sb32; + struct freebsd11_stat32 sb32; int error; + error = kern_fstat(td, uap->fd, &sb); + if (error != 0) + return (error); + freebsd11_cvtstat32(&sb, &sb32); + error = copyout(&sb32, uap->ub, sizeof (sb32)); + return (error); +} + +int +freebsd11_freebsd32_fstatat(struct thread *td, + struct freebsd11_freebsd32_fstatat_args *uap) +{ + struct stat sb; + struct freebsd11_stat32 sb32; + int error; + + error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, + &sb, NULL); + if (error != 0) + return (error); + freebsd11_cvtstat32(&sb, &sb32); + error = copyout(&sb32, uap->buf, sizeof (sb32)); + return (error); +} + +int +freebsd11_freebsd32_lstat(struct thread *td, + struct freebsd11_freebsd32_lstat_args *uap) +{ + struct stat sb; + struct freebsd11_stat32 sb32; + int error; + error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); - copy_ostat(&sb, &sb32); + freebsd11_cvtstat32(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); + return (error); +} + +int +freebsd11_freebsd32_fhstat(struct thread *td, + struct freebsd11_freebsd32_fhstat_args *uap) +{ + struct stat sb; + struct freebsd11_stat32 sb32; + struct fhandle fh; + int error; + + error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); + if (error != 0) + return (error); + error = kern_fhstat(td, fh, &sb); + if (error != 0) + return (error); + freebsd11_cvtstat32(&sb, &sb32); + error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #endif int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } int freebsd32_clock_nanosleep(struct thread *td, struct freebsd32_clock_nanosleep_args *uap) { int error; error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(ua_rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 && !useracc(ua_rmtp, sizeof(rmt32), VM_PROT_WRITE)) return (EFAULT); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } #ifndef _FREEBSD32_SYSPROTO_H_ struct freebsd32_sigqueue_args { pid_t pid; int signum; /* union sigval32 */ int value; }; #endif int freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap) { union sigval sv; /* * On 32-bit ABIs, sival_int and sival_ptr are the same. * On 64-bit little-endian ABIs, the low bits are the same. * In 64-bit big-endian ABIs, sival_int overlaps with * sival_ptr's HIGH bits. We choose to support sival_int * rather than sival_ptr in this case as it seems to be * more common. */ bzero(&sv, sizeof(sv)); sv.sival_int = uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { return (kern_cpuset_getaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { return (kern_cpuset_setaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags) { if ((flags & ~SY_THR_STATIC) != 0) return (EINVAL); if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags); return (0); } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset == 0) return (0); freebsd32_sysent[*offset] = *old_sysent; return (0); } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent, SY_THR_STATIC_KLD); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent, flags); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp; uintptr_t destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = destp; copyout(pagesizes32, (void *)destp, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); } /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat stat; struct kld32_file_stat stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); error = kern_kldstat(td, uap->fileid, &stat); if (error != 0) return (error); bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name)); CP(stat, stat32, refs); CP(stat, stat32, id); PTROUT_CP(stat, stat32, address); CP(stat, stat32, size); bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags; switch (uap->com) { case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } Index: head/sys/compat/freebsd32/syscalls.master =================================================================== --- head/sys/compat/freebsd32/syscalls.master (revision 318735) +++ head/sys/compat/freebsd32/syscalls.master (revision 318736) @@ -1,1086 +1,1113 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; from: src/sys/kern/syscalls.master 1.107 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, -; COMPAT7, NODEF, NOARGS, NOPROTO, NOSTD +; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name psuedo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT10 #ifdef (FreeBSD 10 compat) +; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include #include #include #include #include #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL NOPROTO { int nosys(void); } syscall nosys_args int 1 AUE_EXIT NOPROTO { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK NOPROTO { int fork(void); } 3 AUE_READ NOPROTO { ssize_t read(int fd, void *buf, \ size_t nbyte); } 4 AUE_WRITE NOPROTO { ssize_t write(int fd, const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC NOPROTO { int open(char *path, int flags, \ int mode); } 6 AUE_CLOSE NOPROTO { int close(int fd); } 7 AUE_WAIT4 STD { int freebsd32_wait4(int pid, int *status, \ int options, struct rusage32 *rusage); } 8 AUE_CREAT OBSOL old creat 9 AUE_LINK NOPROTO { int link(char *path, char *link); } 10 AUE_UNLINK NOPROTO { int unlink(char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR NOPROTO { int chdir(char *path); } 13 AUE_FCHDIR NOPROTO { int fchdir(int fd); } -14 AUE_MKNOD NOPROTO { int mknod(char *path, int mode, int dev); } +14 AUE_MKNOD COMPAT11 { int freebsd32_mknod(char *path, \ + int mode, int dev); } 15 AUE_CHMOD NOPROTO { int chmod(char *path, int mode); } 16 AUE_CHOWN NOPROTO { int chown(char *path, int uid, int gid); } 17 AUE_NULL NOPROTO { int obreak(char *nsize); } break \ obreak_args int 18 AUE_GETFSSTAT COMPAT4 { int freebsd32_getfsstat( \ struct statfs32 *buf, long bufsize, \ int mode); } 19 AUE_LSEEK COMPAT { int freebsd32_lseek(int fd, int offset, \ int whence); } 20 AUE_GETPID NOPROTO { pid_t getpid(void); } 21 AUE_MOUNT NOPROTO { int mount(char *type, char *path, \ int flags, caddr_t data); } 22 AUE_UMOUNT NOPROTO { int unmount(char *path, int flags); } 23 AUE_SETUID NOPROTO { int setuid(uid_t uid); } 24 AUE_GETUID NOPROTO { uid_t getuid(void); } 25 AUE_GETEUID NOPROTO { uid_t geteuid(void); } 26 AUE_PTRACE NOPROTO { int ptrace(int req, pid_t pid, \ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int freebsd32_recvmsg(int s, struct msghdr32 *msg, \ int flags); } 28 AUE_SENDMSG STD { int freebsd32_sendmsg(int s, struct msghdr32 *msg, \ int flags); } 29 AUE_RECVFROM STD { int freebsd32_recvfrom(int s, uint32_t buf, \ uint32_t len, int flags, uint32_t from, \ uint32_t fromlenaddr); } 30 AUE_ACCEPT NOPROTO { int accept(int s, caddr_t name, \ int *anamelen); } 31 AUE_GETPEERNAME NOPROTO { int getpeername(int fdes, caddr_t asa, \ int *alen); } 32 AUE_GETSOCKNAME NOPROTO { int getsockname(int fdes, caddr_t asa, \ int *alen); } 33 AUE_ACCESS NOPROTO { int access(char *path, int amode); } 34 AUE_CHFLAGS NOPROTO { int chflags(const char *path, u_long flags); } 35 AUE_FCHFLAGS NOPROTO { int fchflags(int fd, u_long flags); } 36 AUE_SYNC NOPROTO { int sync(void); } 37 AUE_KILL NOPROTO { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int freebsd32_stat(char *path, \ struct ostat32 *ub); } 39 AUE_GETPPID NOPROTO { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int freebsd32_lstat(char *path, \ struct ostat *ub); } 41 AUE_DUP NOPROTO { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int freebsd32_pipe(void); } 43 AUE_GETEGID NOPROTO { gid_t getegid(void); } 44 AUE_PROFILE NOPROTO { int profil(caddr_t samples, size_t size, \ size_t offset, u_int scale); } 45 AUE_KTRACE NOPROTO { int ktrace(const char *fname, int ops, \ int facs, int pid); } 46 AUE_SIGACTION COMPAT { int freebsd32_sigaction( int signum, \ struct osigaction32 *nsa, \ struct osigaction32 *osa); } 47 AUE_GETGID NOPROTO { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int freebsd32_sigprocmask(int how, \ osigset_t mask); } 49 AUE_GETLOGIN NOPROTO { int getlogin(char *namebuf, \ u_int namelen); } 50 AUE_SETLOGIN NOPROTO { int setlogin(char *namebuf); } 51 AUE_ACCT NOPROTO { int acct(char *path); } 52 AUE_SIGPENDING COMPAT { int freebsd32_sigpending(void); } 53 AUE_SIGALTSTACK STD { int freebsd32_sigaltstack( \ struct sigaltstack32 *ss, \ struct sigaltstack32 *oss); } 54 AUE_IOCTL STD { int freebsd32_ioctl(int fd, uint32_t com, \ struct md_ioctl32 *data); } 55 AUE_REBOOT NOPROTO { int reboot(int opt); } 56 AUE_REVOKE NOPROTO { int revoke(char *path); } 57 AUE_SYMLINK NOPROTO { int symlink(char *path, char *link); } 58 AUE_READLINK NOPROTO { ssize_t readlink(char *path, char *buf, \ size_t count); } 59 AUE_EXECVE STD { int freebsd32_execve(char *fname, \ uint32_t *argv, uint32_t *envv); } 60 AUE_UMASK NOPROTO { int umask(int newmask); } umask \ umask_args int 61 AUE_CHROOT NOPROTO { int chroot(char *path); } 62 AUE_FSTAT COMPAT { int freebsd32_fstat(int fd, \ struct ostat32 *ub); } 63 AUE_NULL OBSOL ogetkerninfo 64 AUE_NULL COMPAT { int freebsd32_getpagesize( \ int32_t dummy); } 65 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } 66 AUE_VFORK NOPROTO { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK NOPROTO { int sbrk(int incr); } 70 AUE_SSTK NOPROTO { int sstk(int incr); } 71 AUE_MMAP COMPAT|NOPROTO { int mmap(void *addr, int len, \ int prot, int flags, int fd, int pos); } 72 AUE_O_VADVISE NOPROTO { int ovadvise(int anom); } vadvise \ ovadvise_args int 73 AUE_MUNMAP NOPROTO { int munmap(void *addr, size_t len); } 74 AUE_MPROTECT STD { int freebsd32_mprotect(void *addr, \ size_t len, int prot); } 75 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE NOPROTO { int mincore(const void *addr, size_t len, \ char *vec); } 79 AUE_GETGROUPS NOPROTO { int getgroups(u_int gidsetsize, \ gid_t *gidset); } 80 AUE_SETGROUPS NOPROTO { int setgroups(u_int gidsetsize, \ gid_t *gidset); } 81 AUE_GETPGRP NOPROTO { int getpgrp(void); } 82 AUE_SETPGRP NOPROTO { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int freebsd32_setitimer(u_int which, \ struct itimerval32 *itv, \ struct itimerval32 *oitv); } 84 AUE_NULL OBSOL owait ; XXX implement 85 AUE_SWAPON NOPROTO { int swapon(char *name); } 86 AUE_GETITIMER STD { int freebsd32_getitimer(u_int which, \ struct itimerval32 *itv); } 87 AUE_O_GETHOSTNAME OBSOL ogethostname 88 AUE_O_SETHOSTNAME OBSOL osethostname 89 AUE_GETDTABLESIZE NOPROTO { int getdtablesize(void); } 90 AUE_DUP2 NOPROTO { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int freebsd32_fcntl(int fd, int cmd, \ int arg); } 93 AUE_SELECT STD { int freebsd32_select(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ struct timeval32 *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC NOPROTO { int fsync(int fd); } 96 AUE_SETPRIORITY NOPROTO { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET NOPROTO { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT NOPROTO { int connect(int s, caddr_t name, \ int namelen); } 99 AUE_NULL OBSOL oaccept 100 AUE_GETPRIORITY NOPROTO { int getpriority(int which, int who); } 101 AUE_NULL OBSOL osend 102 AUE_NULL OBSOL orecv 103 AUE_SIGRETURN COMPAT { int freebsd32_sigreturn( \ struct ia32_sigcontext3 *sigcntxp); } 104 AUE_BIND NOPROTO { int bind(int s, caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT NOPROTO { int setsockopt(int s, int level, \ int name, caddr_t val, int valsize); } 106 AUE_LISTEN NOPROTO { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_O_SIGVEC COMPAT { int freebsd32_sigvec(int signum, \ struct sigvec32 *nsv, \ struct sigvec32 *osv); } 109 AUE_O_SIGBLOCK COMPAT { int freebsd32_sigblock(int mask); } 110 AUE_O_SIGSETMASK COMPAT { int freebsd32_sigsetmask( int mask); } 111 AUE_SIGSUSPEND COMPAT { int freebsd32_sigsuspend( int mask); } 112 AUE_O_SIGSTACK COMPAT { int freebsd32_sigstack( \ struct sigstack32 *nss, \ struct sigstack32 *oss); } 113 AUE_NULL OBSOL orecvmsg 114 AUE_NULL OBSOL osendmsg 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int freebsd32_gettimeofday( \ struct timeval32 *tp, \ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int freebsd32_getrusage(int who, \ struct rusage32 *rusage); } 118 AUE_GETSOCKOPT NOPROTO { int getsockopt(int s, int level, \ int name, caddr_t val, int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int freebsd32_readv(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 121 AUE_WRITEV STD { int freebsd32_writev(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int freebsd32_settimeofday( \ struct timeval32 *tv, \ struct timezone *tzp); } 123 AUE_FCHOWN NOPROTO { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD NOPROTO { int fchmod(int fd, int mode); } 125 AUE_RECVFROM OBSOL orecvfrom 126 AUE_SETREUID NOPROTO { int setreuid(int ruid, int euid); } 127 AUE_SETREGID NOPROTO { int setregid(int rgid, int egid); } 128 AUE_RENAME NOPROTO { int rename(char *from, char *to); } 129 AUE_TRUNCATE COMPAT|NOPROTO { int truncate(char *path, \ int length); } 130 AUE_FTRUNCATE COMPAT|NOPROTO { int ftruncate(int fd, int length); } 131 AUE_FLOCK NOPROTO { int flock(int fd, int how); } 132 AUE_MKFIFO NOPROTO { int mkfifo(char *path, int mode); } 133 AUE_SENDTO NOPROTO { int sendto(int s, caddr_t buf, \ size_t len, int flags, caddr_t to, \ int tolen); } 134 AUE_SHUTDOWN NOPROTO { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR NOPROTO { int socketpair(int domain, int type, \ int protocol, int *rsv); } 136 AUE_MKDIR NOPROTO { int mkdir(char *path, int mode); } 137 AUE_RMDIR NOPROTO { int rmdir(char *path); } 138 AUE_UTIMES STD { int freebsd32_utimes(char *path, \ struct timeval32 *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int freebsd32_adjtime( \ struct timeval32 *delta, \ struct timeval32 *olddelta); } 141 AUE_GETPEERNAME OBSOL ogetpeername 142 AUE_SYSCTL OBSOL ogethostid 143 AUE_SYSCTL OBSOL sethostid 144 AUE_GETRLIMIT OBSOL getrlimit 145 AUE_SETRLIMIT OBSOL setrlimit 146 AUE_KILLPG OBSOL killpg 147 AUE_SETSID NOPROTO { int setsid(void); } 148 AUE_QUOTACTL NOPROTO { int quotactl(char *path, int cmd, int uid, \ caddr_t arg); } 149 AUE_O_QUOTA OBSOL oquota 150 AUE_GETSOCKNAME OBSOL ogetsockname ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL UNIMPL nlm_syscall ; 155 is initialized by the NFS code, if present. ; XXX this is a problem!!! 155 AUE_NFS_SVC UNIMPL nfssvc 156 AUE_GETDIRENTRIES COMPAT { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, uint32_t *basep); } 157 AUE_STATFS COMPAT4 { int freebsd32_statfs(char *path, \ struct statfs32 *buf); } 158 AUE_FSTATFS COMPAT4 { int freebsd32_fstatfs(int fd, \ struct statfs32 *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH UNIMPL lgetfh 161 AUE_NFS_GETFH NOPROTO { int getfh(char *fname, \ struct fhandle *fhp); } 162 AUE_SYSCTL OBSOL getdomainname 163 AUE_SYSCTL OBSOL setdomainname 164 AUE_NULL OBSOL uname 165 AUE_SYSARCH STD { int freebsd32_sysarch(int op, char *parms); } 166 AUE_RTPRIO NOPROTO { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int freebsd32_semsys(int which, int a2, \ int a3, int a4, int a5); } 170 AUE_MSGSYS NOSTD { int freebsd32_msgsys(int which, int a2, \ int a3, int a4, int a5, int a6); } 171 AUE_SHMSYS NOSTD { int freebsd32_shmsys(uint32_t which, uint32_t a2, \ uint32_t a3, uint32_t a4); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t freebsd32_pread(int fd, void *buf, \ size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 174 AUE_PWRITE COMPAT6 { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 175 AUE_NULL UNIMPL nosys 176 AUE_NTP_ADJTIME NOPROTO { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID NOPROTO { int setgid(gid_t gid); } 182 AUE_SETEGID NOPROTO { int setegid(gid_t egid); } 183 AUE_SETEUID NOPROTO { int seteuid(uid_t euid); } 184 AUE_NULL UNIMPL lfs_bmapv 185 AUE_NULL UNIMPL lfs_markv 186 AUE_NULL UNIMPL lfs_segclean 187 AUE_NULL UNIMPL lfs_segwait -188 AUE_STAT STD { int freebsd32_stat(char *path, \ - struct stat32 *ub); } -189 AUE_FSTAT STD { int freebsd32_fstat(int fd, \ - struct stat32 *ub); } -190 AUE_LSTAT STD { int freebsd32_lstat(char *path, \ - struct stat32 *ub); } +188 AUE_STAT COMPAT11 { int freebsd32_stat(char *path, \ + struct freebsd11_stat32 *ub); } +189 AUE_FSTAT COMPAT11 { int freebsd32_fstat(int fd, \ + struct freebsd11_stat32 *ub); } +190 AUE_LSTAT COMPAT11 { int freebsd32_lstat(char *path, \ + struct freebsd11_stat32 *ub); } 191 AUE_PATHCONF NOPROTO { int pathconf(char *path, int name); } 192 AUE_FPATHCONF NOPROTO { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT NOPROTO { int getrlimit(u_int which, \ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT NOPROTO { int setrlimit(u_int which, \ struct rlimit *rlp); } setrlimit \ __setrlimit_args int -196 AUE_GETDIRENTRIES STD { int freebsd32_getdirentries(int fd, \ +196 AUE_GETDIRENTRIES COMPAT11 { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, int32_t *basep); } 197 AUE_MMAP COMPAT6 { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, uint32_t pos1, uint32_t pos2); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t freebsd32_lseek(int fd, int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 200 AUE_TRUNCATE COMPAT6 { int freebsd32_truncate(char *path, \ int pad, uint32_t length1, \ uint32_t length2); } 201 AUE_FTRUNCATE COMPAT6 { int freebsd32_ftruncate(int fd, int pad, \ uint32_t length1, uint32_t length2); } 202 AUE_SYSCTL STD { int freebsd32_sysctl(int *name, \ u_int namelen, void *old, \ uint32_t *oldlenp, void *new, \ uint32_t newlen); } 203 AUE_MLOCK NOPROTO { int mlock(const void *addr, \ size_t len); } 204 AUE_MUNLOCK NOPROTO { int munlock(const void *addr, \ size_t len); } 205 AUE_UNDELETE NOPROTO { int undelete(char *path); } 206 AUE_FUTIMES STD { int freebsd32_futimes(int fd, \ struct timeval32 *tptr); } 207 AUE_GETPGID NOPROTO { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL newreboot (NetBSD) 209 AUE_POLL NOPROTO { int poll(struct pollfd *fds, u_int nfds, \ int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int ; ; The following were introduced with NetBSD/4.4Lite-2 ; They are initialized by their respective modules/sysinits ; XXX PROBLEM!! 220 AUE_SEMCTL COMPAT7|NOSTD { int freebsd32_semctl( \ int semid, int semnum, \ int cmd, union semun32 *arg); } 221 AUE_SEMGET NOSTD|NOPROTO { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD|NOPROTO { int semop(int semid, \ struct sembuf *sops, u_int nsops); } 223 AUE_NULL UNIMPL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int freebsd32_msgctl( \ int msqid, int cmd, \ struct msqid_ds32_old *buf); } 225 AUE_MSGGET NOSTD|NOPROTO { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int freebsd32_msgsnd(int msqid, void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { int freebsd32_msgrcv(int msqid, void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD|NOPROTO { int shmat(int shmid, void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int freebsd32_shmctl( \ int shmid, int cmd, \ struct shmid_ds32_old *buf); } 230 AUE_SHMDT NOSTD|NOPROTO { int shmdt(void *shmaddr); } 231 AUE_SHMGET NOSTD|NOPROTO { int shmget(key_t key, int size, \ int shmflg); } ; 232 AUE_NULL STD { int freebsd32_clock_gettime(clockid_t clock_id, \ struct timespec32 *tp); } 233 AUE_CLOCK_SETTIME STD { int freebsd32_clock_settime(clockid_t clock_id, \ const struct timespec32 *tp); } 234 AUE_NULL STD { int freebsd32_clock_getres(clockid_t clock_id, \ struct timespec32 *tp); } 235 AUE_NULL STD { int freebsd32_ktimer_create(\ clockid_t clock_id, \ struct sigevent32 *evp, int *timerid); } 236 AUE_NULL NOPROTO { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int freebsd32_ktimer_settime(int timerid,\ int flags, \ const struct itimerspec32 *value, \ struct itimerspec32 *ovalue); } 238 AUE_NULL STD { int freebsd32_ktimer_gettime(int timerid,\ struct itimerspec32 *value); } 239 AUE_NULL NOPROTO { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int freebsd32_nanosleep( \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 241 AUE_NULL NOPROTO { int ffclock_getcounter(ffcounter *ffcount); } 242 AUE_NULL NOPROTO { int ffclock_setestimate( \ struct ffclock_estimate *cest); } 243 AUE_NULL NOPROTO { int ffclock_getestimate( \ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int freebsd32_clock_nanosleep( \ clockid_t clock_id, int flags, \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int freebsd32_clock_getcpuclockid2(\ uint32_t id1, uint32_t id2,\ int which, clockid_t *clock_id); } 248 AUE_NULL UNIMPL ntp_gettime 249 AUE_NULL UNIMPL nosys ; syscall numbers initially used in OpenBSD 250 AUE_MINHERIT NOPROTO { int minherit(void *addr, size_t len, \ int inherit); } 251 AUE_RFORK NOPROTO { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID NOPROTO { int issetugid(void); } 254 AUE_LCHOWN NOPROTO { int lchown(char *path, int uid, int gid); } 255 AUE_AIO_READ STD { int freebsd32_aio_read( \ struct aiocb32 *aiocbp); } 256 AUE_AIO_WRITE STD { int freebsd32_aio_write( \ struct aiocb32 *aiocbp); } 257 AUE_LIO_LISTIO STD { int freebsd32_lio_listio(int mode, \ struct aiocb32 * const *acb_list, \ int nent, struct sigevent32 *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys -272 AUE_O_GETDENTS NOPROTO { int getdents(int fd, char *buf, \ - size_t count); } +272 AUE_O_GETDENTS COMPAT11 { int freebsd32_getdents(int fd, char *buf, \ + int count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD NOPROTO { int lchmod(char *path, mode_t mode); } 275 AUE_LCHOWN NOPROTO { int lchown(char *path, uid_t uid, \ gid_t gid); } netbsd_lchown \ lchown_args int 276 AUE_LUTIMES STD { int freebsd32_lutimes(char *path, \ struct timeval32 *tptr); } 277 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } netbsd_msync msync_args int -278 AUE_STAT NOPROTO { int nstat(char *path, struct nstat *ub); } -279 AUE_FSTAT NOPROTO { int nfstat(int fd, struct nstat *sb); } -280 AUE_LSTAT NOPROTO { int nlstat(char *path, struct nstat *ub); } +278 AUE_STAT COMPAT11|NOPROTO { int nstat(char *path, struct nstat *ub); } +279 AUE_FSTAT COMPAT11|NOPROTO { int nfstat(int fd, struct nstat *sb); } +280 AUE_LSTAT COMPAT11|NOPROTO { int nlstat(char *path, struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys ; 289 and 290 from NetBSD (OpenBSD: 267 and 268) 289 AUE_PREADV STD { ssize_t freebsd32_preadv(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 290 AUE_PWRITEV STD { ssize_t freebsd32_pwritev(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys ; XXX 297 is 300 in NetBSD 297 AUE_FHSTATFS COMPAT4 { int freebsd32_fhstatfs( \ const struct fhandle *u_fhp, \ struct statfs32 *buf); } 298 AUE_FHOPEN NOPROTO { int fhopen(const struct fhandle *u_fhp, \ int flags); } -299 AUE_FHSTAT NOPROTO { int fhstat(const struct fhandle *u_fhp, \ - struct stat *sb); } +299 AUE_FHSTAT COMPAT11 { int freebsd32_fhstat( \ + const struct fhandle *u_fhp, \ + struct freebsd11_stat32 *sb); } ; syscall numbers for FreeBSD 300 AUE_NULL NOPROTO { int modnext(int modid); } 301 AUE_NULL STD { int freebsd32_modstat(int modid, \ struct module_stat32* stat); } 302 AUE_NULL NOPROTO { int modfnext(int modid); } 303 AUE_NULL NOPROTO { int modfind(const char *name); } 304 AUE_MODLOAD NOPROTO { int kldload(const char *file); } 305 AUE_MODUNLOAD NOPROTO { int kldunload(int fileid); } 306 AUE_NULL NOPROTO { int kldfind(const char *file); } 307 AUE_NULL NOPROTO { int kldnext(int fileid); } 308 AUE_NULL STD { int freebsd32_kldstat(int fileid, \ struct kld32_file_stat* stat); } 309 AUE_NULL NOPROTO { int kldfirstmod(int fileid); } 310 AUE_GETSID NOPROTO { int getsid(pid_t pid); } 311 AUE_SETRESUID NOPROTO { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID NOPROTO { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { int freebsd32_aio_return( \ struct aiocb32 *aiocbp); } 315 AUE_AIO_SUSPEND STD { int freebsd32_aio_suspend( \ struct aiocb32 * const * aiocbp, int nent, \ const struct timespec32 *timeout); } 316 AUE_AIO_CANCEL NOPROTO { int aio_cancel(int fd, \ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int freebsd32_aio_error( \ struct aiocb32 *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int freebsd32_aio_read( \ struct oaiocb32 *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int freebsd32_aio_write( \ struct oaiocb32 *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int freebsd32_lio_listio(int mode, \ struct oaiocb32 * const *acb_list, \ int nent, struct osigevent32 *sig); } 321 AUE_NULL NOPROTO { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL NOPROTO { int mlockall(int how); } 325 AUE_MUNLOCKALL NOPROTO { int munlockall(void); } 326 AUE_GETCWD NOPROTO { int __getcwd(char *buf, size_t buflen); } 327 AUE_NULL NOPROTO { int sched_setparam (pid_t pid, \ const struct sched_param *param); } 328 AUE_NULL NOPROTO { int sched_getparam (pid_t pid, \ struct sched_param *param); } 329 AUE_NULL NOPROTO { int sched_setscheduler (pid_t pid, \ int policy, \ const struct sched_param *param); } 330 AUE_NULL NOPROTO { int sched_getscheduler (pid_t pid); } 331 AUE_NULL NOPROTO { int sched_yield (void); } 332 AUE_NULL NOPROTO { int sched_get_priority_max (int policy); } 333 AUE_NULL NOPROTO { int sched_get_priority_min (int policy); } 334 AUE_NULL NOPROTO { int sched_rr_get_interval (pid_t pid, \ struct timespec *interval); } 335 AUE_NULL NOPROTO { int utrace(const void *addr, size_t len); } 336 AUE_SENDFILE COMPAT4 { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 337 AUE_NULL NOPROTO { int kldsym(int fileid, int cmd, \ void *data); } 338 AUE_JAIL STD { int freebsd32_jail(struct jail32 *jail); } 339 AUE_NULL UNIMPL pioctl 340 AUE_SIGPROCMASK NOPROTO { int sigprocmask(int how, \ const sigset_t *set, sigset_t *oset); } 341 AUE_SIGSUSPEND NOPROTO { int sigsuspend(const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 343 AUE_SIGPENDING NOPROTO { int sigpending(sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int freebsd32_sigreturn( \ const struct freebsd4_freebsd32_ucontext *sigcntxp); } 345 AUE_SIGWAIT STD { int freebsd32_sigtimedwait(const sigset_t *set, \ siginfo_t *info, \ const struct timespec *timeout); } 346 AUE_NULL STD { int freebsd32_sigwaitinfo(const sigset_t *set, \ siginfo_t *info); } 347 AUE_ACL_GET_FILE NOPROTO { int __acl_get_file(const char *path, \ acl_type_t type, struct acl *aclp); } 348 AUE_ACL_SET_FILE NOPROTO { int __acl_set_file(const char *path, \ acl_type_t type, struct acl *aclp); } 349 AUE_ACL_GET_FD NOPROTO { int __acl_get_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 350 AUE_ACL_SET_FD NOPROTO { int __acl_set_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 351 AUE_ACL_DELETE_FILE NOPROTO { int __acl_delete_file(const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD NOPROTO { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE NOPROTO { int __acl_aclcheck_file(const char *path, \ acl_type_t type, struct acl *aclp); } 354 AUE_ACL_CHECK_FD NOPROTO { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 355 AUE_EXTATTRCTL NOPROTO { int extattrctl(const char *path, int cmd, \ const char *filename, int attrnamespace, \ const char *attrname); } 356 AUE_EXTATTR_SET_FILE NOPROTO { ssize_t extattr_set_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE NOPROTO { ssize_t extattr_get_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE NOPROTO { int extattr_delete_file( \ const char *path, int attrnamespace, \ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { int freebsd32_aio_waitcomplete( \ struct aiocb32 **aiocbp, \ struct timespec32 *timeout); } 360 AUE_GETRESUID NOPROTO { int getresuid(uid_t *ruid, uid_t *euid, \ uid_t *suid); } 361 AUE_GETRESGID NOPROTO { int getresgid(gid_t *rgid, gid_t *egid, \ gid_t *sgid); } 362 AUE_KQUEUE NOPROTO { int kqueue(void); } 363 AUE_KEVENT STD { int freebsd32_kevent(int fd, \ const struct kevent32 *changelist, \ int nchanges, \ struct kevent32 *eventlist, int nevents, \ const struct timespec32 *timeout); } 364 AUE_NULL UNIMPL __cap_get_proc 365 AUE_NULL UNIMPL __cap_set_proc 366 AUE_NULL UNIMPL __cap_get_fd 367 AUE_NULL UNIMPL __cap_get_file 368 AUE_NULL UNIMPL __cap_set_fd 369 AUE_NULL UNIMPL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD NOPROTO { ssize_t extattr_set_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 372 AUE_EXTATTR_GET_FD NOPROTO { ssize_t extattr_get_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD NOPROTO { int extattr_delete_fd(int fd, \ int attrnamespace, \ const char *attrname); } 374 AUE_SETUGID NOPROTO { int __setugid(int flag); } 375 AUE_NULL UNIMPL nfsclnt 376 AUE_EACCESS NOPROTO { int eaccess(char *path, int amode); } 377 AUE_NULL UNIMPL afs_syscall 378 AUE_NMOUNT STD { int freebsd32_nmount(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 379 AUE_NULL UNIMPL kse_exit 380 AUE_NULL UNIMPL kse_wakeup 381 AUE_NULL UNIMPL kse_create 382 AUE_NULL UNIMPL kse_thr_interrupt 383 AUE_NULL UNIMPL kse_release 384 AUE_NULL UNIMPL __mac_get_proc 385 AUE_NULL UNIMPL __mac_set_proc 386 AUE_NULL UNIMPL __mac_get_fd 387 AUE_NULL UNIMPL __mac_get_file 388 AUE_NULL UNIMPL __mac_set_fd 389 AUE_NULL UNIMPL __mac_set_file 390 AUE_NULL NOPROTO { int kenv(int what, const char *name, \ char *value, int len); } 391 AUE_LCHFLAGS NOPROTO { int lchflags(const char *path, \ u_long flags); } 392 AUE_NULL NOPROTO { int uuidgen(struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 394 AUE_NULL UNIMPL mac_syscall -395 AUE_GETFSSTAT NOPROTO { int getfsstat(struct statfs *buf, \ +395 AUE_GETFSSTAT COMPAT11|NOPROTO { int getfsstat( \ + struct freebsd11_statfs *buf, \ long bufsize, int mode); } -396 AUE_STATFS NOPROTO { int statfs(char *path, \ +396 AUE_STATFS COMPAT11|NOPROTO { int statfs(char *path, \ struct statfs *buf); } -397 AUE_FSTATFS NOPROTO { int fstatfs(int fd, struct statfs *buf); } -398 AUE_FHSTATFS NOPROTO { int fhstatfs(const struct fhandle *u_fhp, \ - struct statfs *buf); } +397 AUE_FSTATFS COMPAT11|NOPROTO { int fstatfs(int fd, \ + struct freebsd11_statfs *buf); } +398 AUE_FHSTATFS COMPAT11|NOPROTO { int fhstatfs( \ + const struct fhandle *u_fhp, \ + struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD|NOPROTO { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD|NOPROTO { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD|NOPROTO { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD|NOPROTO { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int freebsd32_ksem_init(semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int freebsd32_ksem_open(semid_t *idp, \ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD|NOPROTO { int ksem_unlink(const char *name); } 407 AUE_SEMGETVALUE NOSTD|NOPROTO { int ksem_getvalue(semid_t id, \ int *val); } 408 AUE_SEMDESTROY NOSTD|NOPROTO { int ksem_destroy(semid_t id); } 409 AUE_NULL UNIMPL __mac_get_pid 410 AUE_NULL UNIMPL __mac_get_link 411 AUE_NULL UNIMPL __mac_set_link 412 AUE_EXTATTR_SET_LINK NOPROTO { ssize_t extattr_set_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK NOPROTO { ssize_t extattr_get_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK NOPROTO { int extattr_delete_link( \ const char *path, int attrnamespace, \ const char *attrname); } 415 AUE_NULL UNIMPL __mac_execve 416 AUE_SIGACTION STD { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 417 AUE_SIGRETURN STD { int freebsd32_sigreturn( \ const struct freebsd32_ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int freebsd32_getcontext( \ struct freebsd32_ucontext *ucp); } 422 AUE_NULL STD { int freebsd32_setcontext( \ const struct freebsd32_ucontext *ucp); } 423 AUE_NULL STD { int freebsd32_swapcontext( \ struct freebsd32_ucontext *oucp, \ const struct freebsd32_ucontext *ucp); } 424 AUE_SWAPOFF UNIMPL swapoff 425 AUE_ACL_GET_LINK NOPROTO { int __acl_get_link(const char *path, \ acl_type_t type, struct acl *aclp); } 426 AUE_ACL_SET_LINK NOPROTO { int __acl_set_link(const char *path, \ acl_type_t type, struct acl *aclp); } 427 AUE_ACL_DELETE_LINK NOPROTO { int __acl_delete_link(const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK NOPROTO { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } 429 AUE_SIGWAIT NOPROTO { int sigwait(const sigset_t *set, \ int *sig); } 430 AUE_THR_CREATE UNIMPL thr_create; 431 AUE_THR_EXIT NOPROTO { void thr_exit(long *state); } 432 AUE_NULL NOPROTO { int thr_self(long *id); } 433 AUE_THR_KILL NOPROTO { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH NOPROTO { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD NOPROTO { ssize_t extattr_list_fd(int fd, \ int attrnamespace, void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE NOPROTO { ssize_t extattr_list_file( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK NOPROTO { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 440 AUE_NULL UNIMPL kse_switchin 441 AUE_SEMWAIT NOSTD { int freebsd32_ksem_timedwait(semid_t id, \ const struct timespec32 *abstime); } 442 AUE_NULL STD { int freebsd32_thr_suspend( \ const struct timespec32 *timeout); } 443 AUE_NULL NOPROTO { int thr_wake(long id); } 444 AUE_MODUNLOAD NOPROTO { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT NOPROTO { int audit(const void *record, \ u_int length); } 446 AUE_AUDITON NOPROTO { int auditon(int cmd, void *data, \ u_int length); } 447 AUE_GETAUID NOPROTO { int getauid(uid_t *auid); } 448 AUE_SETAUID NOPROTO { int setauid(uid_t *auid); } 449 AUE_GETAUDIT NOPROTO { int getaudit(struct auditinfo *auditinfo); } 450 AUE_SETAUDIT NOPROTO { int setaudit(struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR NOPROTO { int getaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR NOPROTO { int setaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL NOPROTO { int auditctl(char *path); } 454 AUE_NULL STD { int freebsd32_umtx_op(void *obj, int op,\ u_long val, void *uaddr, \ void *uaddr2); } 455 AUE_THR_NEW STD { int freebsd32_thr_new( \ struct thr_param32 *param, \ int param_size); } 456 AUE_NULL STD { int freebsd32_sigqueue(pid_t pid, \ int signum, int value); } 457 AUE_MQ_OPEN NOSTD { int freebsd32_kmq_open( \ const char *path, int flags, mode_t mode, \ const struct mq_attr32 *attr); } 458 AUE_MQ_SETATTR NOSTD { int freebsd32_kmq_setattr(int mqd, \ const struct mq_attr32 *attr, \ struct mq_attr32 *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int freebsd32_kmq_timedreceive(int mqd, \ char *msg_ptr, size_t msg_len, \ unsigned *msg_prio, \ const struct timespec32 *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int freebsd32_kmq_timedsend(int mqd, \ const char *msg_ptr, size_t msg_len,\ unsigned msg_prio, \ const struct timespec32 *abs_timeout);} 461 AUE_MQ_NOTIFY NOSTD { int freebsd32_kmq_notify(int mqd, \ const struct sigevent32 *sigev); } 462 AUE_MQ_UNLINK NOPROTO|NOSTD { int kmq_unlink(const char *path); } 463 AUE_NULL NOPROTO { int abort2(const char *why, int nargs, void **args); } 464 AUE_NULL NOPROTO { int thr_set_name(long id, const char *name); } 465 AUE_AIO_FSYNC STD { int freebsd32_aio_fsync(int op, \ struct aiocb32 *aiocbp); } 466 AUE_RTPRIO NOPROTO { int rtprio_thread(int function, \ lwpid_t lwpid, struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOPROTO|NOSTD { int sctp_peeloff(int sd, uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOPROTO|NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOPROTO|NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOPROTO|NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } #ifdef PAD64_REQUIRED 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ int pad, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ int pad, \ uint32_t length1, uint32_t length2); } #else 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ uint32_t length1, uint32_t length2); } #endif 481 AUE_THR_KILL2 NOPROTO { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN NOPROTO { int shm_open(const char *path, int flags, \ mode_t mode); } 483 AUE_SHMUNLINK NOPROTO { int shm_unlink(const char *path); } 484 AUE_NULL NOPROTO { int cpuset(cpusetid_t *setid); } #ifdef PAD64_REQUIRED 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ int pad, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #else 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #endif 486 AUE_NULL STD { int freebsd32_cpuset_getid(cpulevel_t level, \ cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t *setid); } 487 AUE_NULL STD { int freebsd32_cpuset_getaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ cpuset_t *mask); } 488 AUE_NULL STD { int freebsd32_cpuset_setaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ const cpuset_t *mask); } 489 AUE_FACCESSAT NOPROTO { int faccessat(int fd, char *path, int amode, \ int flag); } 490 AUE_FCHMODAT NOPROTO { int fchmodat(int fd, const char *path, \ mode_t mode, int flag); } 491 AUE_FCHOWNAT NOPROTO { int fchownat(int fd, char *path, uid_t uid, \ gid_t gid, int flag); } 492 AUE_FEXECVE STD { int freebsd32_fexecve(int fd, \ uint32_t *argv, uint32_t *envv); } -493 AUE_FSTATAT STD { int freebsd32_fstatat(int fd, char *path, \ - struct stat *buf, int flag); } +493 AUE_FSTATAT COMPAT11 { int freebsd32_fstatat(int fd, \ + char *path, struct freebsd11_stat32 *buf, \ + int flag); } 494 AUE_FUTIMESAT STD { int freebsd32_futimesat(int fd, char *path, \ struct timeval *times); } 495 AUE_LINKAT NOPROTO { int linkat(int fd1, char *path1, int fd2, \ char *path2, int flag); } 496 AUE_MKDIRAT NOPROTO { int mkdirat(int fd, char *path, \ mode_t mode); } 497 AUE_MKFIFOAT NOPROTO { int mkfifoat(int fd, char *path, \ mode_t mode); } -498 AUE_MKNODAT NOPROTO { int mknodat(int fd, char *path, \ - mode_t mode, dev_t dev); } +498 AUE_MKNODAT COMPAT11 { int freebsd32_mknodat(int fd, char *path, \ + mode_t mode, uint32_t dev); } 499 AUE_OPENAT_RWTC NOPROTO { int openat(int fd, char *path, int flag, \ mode_t mode); } 500 AUE_READLINKAT NOPROTO { int readlinkat(int fd, char *path, char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT NOPROTO { int renameat(int oldfd, char *old, int newfd, \ const char *new); } 502 AUE_SYMLINKAT NOPROTO { int symlinkat(char *path1, int fd, \ char *path2); } 503 AUE_UNLINKAT NOPROTO { int unlinkat(int fd, char *path, \ int flag); } 504 AUE_POSIX_OPENPT NOPROTO { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL UNIMPL gssd_syscall 506 AUE_JAIL_GET STD { int freebsd32_jail_get(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int freebsd32_jail_set(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE NOPROTO { int jail_remove(int jid); } 509 AUE_CLOSEFROM NOPROTO { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int freebsd32_semctl(int semid, int semnum, \ int cmd, union semun32 *arg); } 511 AUE_MSGCTL NOSTD { int freebsd32_msgctl(int msqid, int cmd, \ struct msqid_ds32 *buf); } 512 AUE_SHMCTL NOSTD { int freebsd32_shmctl(int shmid, int cmd, \ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET NOPROTO { int __cap_rights_get(int version, \ int fd, cap_rights_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } 518 AUE_PDFORK NOPROTO { int pdfork(int *fdp, int flags); } 519 AUE_PDKILL NOPROTO { int pdkill(int fd, int signum); } 520 AUE_PDGETPID NOPROTO { int pdgetpid(int fd, pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int freebsd32_pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec32 *ts, \ const sigset_t *sm); } 523 AUE_GETLOGINCLASS NOPROTO { int getloginclass(char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS NOPROTO { int setloginclass(const char *namebuf); } 525 AUE_NULL NOPROTO { int rctl_get_racct(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 526 AUE_NULL NOPROTO { int rctl_get_rules(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 527 AUE_NULL NOPROTO { int rctl_get_limits(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 528 AUE_NULL NOPROTO { int rctl_add_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } #ifdef PAD64_REQUIRED 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, int pad, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #else 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd,\ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #endif 533 AUE_CAP_RIGHTS_LIMIT NOPROTO { \ int cap_rights_limit(int fd, \ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { \ int freebsd32_cap_ioctls_limit(int fd, \ const uint32_t *cmds, size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { \ ssize_t freebsd32_cap_ioctls_get(int fd, \ uint32_t *cmds, size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ uint32_t *fcntlrightsp); } 538 AUE_BINDAT NOPROTO { int bindat(int fd, int s, caddr_t name, \ int namelen); } 539 AUE_CONNECTAT NOPROTO { int connectat(int fd, int s, caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT NOPROTO { int chflagsat(int fd, const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT NOPROTO { int accept4(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE NOPROTO { int pipe2(int *fildes, int flags); } 543 AUE_AIO_MLOCK STD { int freebsd32_aio_mlock( \ struct aiocb32 *aiocbp); } #ifdef PAD64_REQUIRED 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, int pad, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #else 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #endif 545 AUE_POLL STD { int freebsd32_ppoll(struct pollfd *fds, \ u_int nfds, const struct timespec32 *ts, \ const sigset_t *set); } 546 AUE_FUTIMES STD { int freebsd32_futimens(int fd, \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \ char *path, \ struct timespec *times, int flag); } 548 AUE_NULL NOPROTO { int numa_getaffinity(cpuwhich_t which, \ id_t id, \ struct vm_domain_policy *policy); } 549 AUE_NULL NOPROTO { int numa_setaffinity(cpuwhich_t which, \ id_t id, \ const struct vm_domain_policy *policy); } 550 AUE_FSYNC NOPROTO { int fdatasync(int fd); } +551 AUE_FSTAT STD { int freebsd32_fstat(int fd, \ + struct stat32 *ub); } +552 AUE_FSTATAT STD { int freebsd32_fstatat(int fd, \ + char *path, struct stat32 *buf, \ + int flag); } +553 AUE_FHSTAT STD { int freebsd32_fhstat( \ + const struct fhandle *u_fhp, \ + struct stat32 *sb); } +554 AUE_GETDIRENTRIES STD { ssize_t freebsd32_getdirentries( \ + int fd, char *buf, size_t count, \ + int32_t *basep); } +555 AUE_STATFS NOPROTO { int statfs(char *path, \ + struct statfs32 *buf); } +556 AUE_FSTATFS NOPROTO { int fstatfs(int fd, struct statfs32 *buf); } +557 AUE_GETFSSTAT NOPROTO { int getfsstat(struct statfs32 *buf, \ + long bufsize, int mode); } +558 AUE_FHSTATFS NOPROTO { int fhstatfs(const struct fhandle *u_fhp, \ + struct statfs32 *buf); } +559 AUE_MKNODAT NOPROTO { int mknodat(int fd, char *path, mode_t mode, \ + dev_t dev); } Index: head/sys/compat/linux/linux_file.c =================================================================== --- head/sys/compat/linux/linux_file.c (revision 318735) +++ head/sys/compat/linux/linux_file.c (revision 318736) @@ -1,1673 +1,1677 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include static int linux_common_open(struct thread *, int, char *, int, int); static int linux_getdents_error(struct thread *, int, int); int linux_creat(struct thread *td, struct linux_creat_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(creat)) printf(ARGS(creat, "%s, %d"), path, args->mode); #endif error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); LFREEPATH(path); return (error); } static int linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode) { cap_rights_t rights; struct proc *p = td->td_proc; struct file *fp; int fd; int bsd_flags, error; bsd_flags = 0; switch (l_flags & LINUX_O_ACCMODE) { case LINUX_O_WRONLY: bsd_flags |= O_WRONLY; break; case LINUX_O_RDWR: bsd_flags |= O_RDWR; break; default: bsd_flags |= O_RDONLY; } if (l_flags & LINUX_O_NDELAY) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_APPEND) bsd_flags |= O_APPEND; if (l_flags & LINUX_O_SYNC) bsd_flags |= O_FSYNC; if (l_flags & LINUX_O_NONBLOCK) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_FASYNC) bsd_flags |= O_ASYNC; if (l_flags & LINUX_O_CREAT) bsd_flags |= O_CREAT; if (l_flags & LINUX_O_TRUNC) bsd_flags |= O_TRUNC; if (l_flags & LINUX_O_EXCL) bsd_flags |= O_EXCL; if (l_flags & LINUX_O_NOCTTY) bsd_flags |= O_NOCTTY; if (l_flags & LINUX_O_DIRECT) bsd_flags |= O_DIRECT; if (l_flags & LINUX_O_NOFOLLOW) bsd_flags |= O_NOFOLLOW; if (l_flags & LINUX_O_DIRECTORY) bsd_flags |= O_DIRECTORY; /* XXX LINUX_O_NOATIME: unable to be easily implemented. */ error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode); if (error != 0) goto done; if (bsd_flags & O_NOCTTY) goto done; /* * XXX In between kern_open() and fget(), another process * having the same filedesc could use that fd without * checking below. */ fd = td->td_retval[0]; if (fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp) == 0) { if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); goto done; } sx_slock(&proctree_lock); PROC_LOCK(p); if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* XXXPJD: Verify if TIOCSCTTY is allowed. */ (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); } else { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); } fdrop(fp, td); } done: #ifdef DEBUG if (ldebug(open)) printf(LMSG("open returns error %d"), error); #endif LFREEPATH(path); return (error); } int linux_openat(struct thread *td, struct linux_openat_args *args) { char *path; int dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->flags & LINUX_O_CREAT) LCONVPATH_AT(td, args->filename, &path, 1, dfd); else LCONVPATH_AT(td, args->filename, &path, 0, dfd); #ifdef DEBUG if (ldebug(openat)) printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd, path, args->flags, args->mode); #endif return (linux_common_open(td, dfd, path, args->flags, args->mode)); } int linux_open(struct thread *td, struct linux_open_args *args) { char *path; if (args->flags & LINUX_O_CREAT) LCONVPATHCREAT(td, args->path, &path); else LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(open)) printf(ARGS(open, "%s, 0x%x, 0x%x"), path, args->flags, args->mode); #endif return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode)); } int linux_lseek(struct thread *td, struct linux_lseek_args *args) { #ifdef DEBUG if (ldebug(lseek)) printf(ARGS(lseek, "%d, %ld, %d"), args->fdes, (long)args->off, args->whence); #endif return (kern_lseek(td, args->fdes, args->off, args->whence)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { int error; off_t off; #ifdef DEBUG if (ldebug(llseek)) printf(ARGS(llseek, "%d, %d:%d, %d"), args->fd, args->ohigh, args->olow, args->whence); #endif off = (args->olow) | (((off_t) args->ohigh) << 32); error = kern_lseek(td, args->fd, off, args->whence); if (error != 0) return (error); error = copyout(td->td_retval, args->res, sizeof(off_t)); if (error != 0) return (error); td->td_retval[0] = 0; return (0); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same * arguments. They only differ in the definition of struct dirent they * operate on. * Note that linux_readdir(2) is a special case of linux_getdents(2) * where count is always equals 1, meaning that the buffer is one * dirent-structure in size and that the code can't handle more anyway. * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2) * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will * trash user stack. */ static int linux_getdents_error(struct thread *td, int fd, int err) { cap_rights_t rights; struct vnode *vp; struct file *fp; int error; /* Linux return ENOTDIR in case when fd is not a directory. */ error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (ENOTDIR); } fdrop(fp, td); return (err); } struct l_dirent { l_ulong d_ino; l_off_t d_off; l_ushort d_reclen; char d_name[LINUX_NAME_MAX + 1]; }; struct l_dirent64 { uint64_t d_ino; int64_t d_off; l_ushort d_reclen; u_char d_type; char d_name[LINUX_NAME_MAX + 1]; }; /* * Linux uses the last byte in the dirent buffer to store d_type, * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes. */ #define LINUX_RECLEN(namlen) \ roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong)) #define LINUX_RECLEN64(namlen) \ roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1, \ sizeof(uint64_t)) #define LINUX_DIRBLKSIZ 512 /* * Linux l_dirent is bigger than FreeBSD dirent, thus the buffer size * passed to kern_getdirentries() must be smaller than the one passed * to linux_getdents() by certain factor. */ #define LINUX_RECLEN_RATIO(X) X * offsetof(struct dirent, d_name) / \ offsetof(struct l_dirent, d_name); #define LINUX_RECLEN64_RATIO(X) X * offsetof(struct dirent, d_name) / \ offsetof(struct l_dirent64, d_name); int linux_getdents(struct thread *td, struct linux_getdents_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ - long base; + off_t base; struct l_dirent *linux_dirent; int buflen, error; size_t retval; #ifdef DEBUG if (ldebug(getdents)) printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count); #endif buflen = LINUX_RECLEN_RATIO(args->count); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = base + reclen; linux_dirent->d_reclen = linuxreclen; /* * Copy d_type to last byte of l_dirent buffer */ lbuf[linuxreclen - 1] = bdp->d_type; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)-1); error = copyout(linux_dirent, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_LINUX); out1: free(buf, M_LINUX); return (error); } int linux_getdents64(struct thread *td, struct linux_getdents64_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ - long base; + off_t base; struct l_dirent64 *linux_dirent64; int buflen, error; size_t retval; #ifdef DEBUG if (ldebug(getdents64)) uprintf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count); #endif buflen = LINUX_RECLEN64_RATIO(args->count); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dirent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN64(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; linux_dirent64->d_off = base + reclen; linux_dirent64->d_reclen = linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent64, d_name)); error = copyout(linux_dirent64, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_readdir(struct thread *td, struct linux_readdir_args *args) { struct dirent *bdp; caddr_t buf; /* BSD-format */ int linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ - long base; + off_t base; struct l_dirent *linux_dirent; int buflen, error; #ifdef DEBUG if (ldebug(readdir)) printf(ARGS(readdir, "%d, *"), args->fd); #endif buflen = LINUX_RECLEN(LINUX_NAME_MAX); buflen = LINUX_RECLEN_RATIO(buflen); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out; } if (td->td_retval[0] == 0) goto out; lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); bdp = (struct dirent *) buf; linuxreclen = LINUX_RECLEN(bdp->d_namlen); linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = linuxreclen; linux_dirent->d_reclen = bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); error = copyout(linux_dirent, args->dent, linuxreclen); if (error == 0) td->td_retval[0] = linuxreclen; free(lbuf, M_LINUX); out: free(buf, M_LINUX); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * These exist mainly for hooks for doing /compat/linux translation. */ int linux_access(struct thread *td, struct linux_access_args *args) { char *path; int error; /* linux convention */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(access)) printf(ARGS(access, "%s, %d"), path, args->amode); #endif error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); } int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { char *path; int error, dfd; /* linux convention */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(access)) printf(ARGS(access, "%s, %d"), path, args->amode); #endif error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); } int linux_unlink(struct thread *td, struct linux_unlink_args *args) { char *path; int error; struct stat st; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(unlink)) printf(ARGS(unlink, "%s"), path); #endif error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, NULL) == 0) { if (S_ISDIR(st.st_mode)) error = EISDIR; } } LFREEPATH(path); return (error); } int linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args) { char *path; int error, dfd; struct stat st; if (args->flag & ~LINUX_AT_REMOVEDIR) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(unlinkat)) printf(ARGS(unlinkat, "%s"), path); #endif if (args->flag & LINUX_AT_REMOVEDIR) error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE); else error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode)) error = EISDIR; } LFREEPATH(path); return (error); } int linux_chdir(struct thread *td, struct linux_chdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chdir)) printf(ARGS(chdir, "%s"), path); #endif error = kern_chdir(td, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } int linux_chmod(struct thread *td, struct linux_chmod_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chmod)) printf(ARGS(chmod, "%s, %d"), path, args->mode); #endif error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } int linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(fchmodat)) printf(ARGS(fchmodat, "%s, %d"), path, args->mode); #endif error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } int linux_mkdir(struct thread *td, struct linux_mkdir_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mkdir)) printf(ARGS(mkdir, "%s, %d"), path, args->mode); #endif error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } int linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(mkdirat)) printf(ARGS(mkdirat, "%s, %d"), path, args->mode); #endif error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } int linux_rmdir(struct thread *td, struct linux_rmdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(rmdir)) printf(ARGS(rmdir, "%s"), path); #endif error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } int linux_rename(struct thread *td, struct linux_rename_args *args) { char *from, *to; int error; LCONVPATHEXIST(td, args->from, &from); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(from); return (error); } #ifdef DEBUG if (ldebug(rename)) printf(ARGS(rename, "%s, %s"), from, to); #endif error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } int linux_renameat(struct thread *td, struct linux_renameat_args *args) { char *from, *to; int error, olddfd, newdfd; olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(from); return (error); } #ifdef DEBUG if (ldebug(renameat)) printf(ARGS(renameat, "%s, %s"), from, to); #endif error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } int linux_symlink(struct thread *td, struct linux_symlink_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(symlink)) printf(ARGS(symlink, "%s, %s"), path, to); #endif error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args) { char *path, *to; int error, dfd; dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST(td, args->oldname, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(symlinkat)) printf(ARGS(symlinkat, "%s, %s"), path, to); #endif error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_readlink(struct thread *td, struct linux_readlink_args *args) { char *name; int error; LCONVPATHEXIST(td, args->name, &name); #ifdef DEBUG if (ldebug(readlink)) printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf, args->count); #endif error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); return (error); } int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { char *name; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->path, &name, dfd); #ifdef DEBUG if (ldebug(readlinkat)) printf(ARGS(readlinkat, "%s, %p, %d"), name, (void *)args->buf, args->bufsiz); #endif error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); return (error); } int linux_truncate(struct thread *td, struct linux_truncate_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(truncate)) printf(ARGS(truncate, "%s, %ld"), path, (long)args->length); #endif error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(truncate64)) printf(ARGS(truncate64, "%s, %jd"), path, args->length); #endif error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { return (kern_ftruncate(td, args->fd, args->length)); } int linux_link(struct thread *td, struct linux_link_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(link)) printf(ARGS(link, "%s, %s"), path, to); #endif error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE, FOLLOW); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_linkat(struct thread *td, struct linux_linkat_args *args) { char *path, *to; int error, olddfd, newdfd, follow; if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW) return (EINVAL); olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(linkat)) printf(ARGS(linkat, "%i, %s, %i, %s, %i"), args->olddfd, path, args->newdfd, to, args->flag); #endif follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW : FOLLOW; error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_fdatasync(td, uap) struct thread *td; struct linux_fdatasync_args *uap; { return (kern_fsync(td, uap->fd, false)); } int linux_pread(struct thread *td, struct linux_pread_args *uap) { cap_rights_t rights; struct vnode *vp; int error; error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset); if (error == 0) { /* This seems to violate POSIX but linux does it */ error = fgetvp(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &vp); if (error != 0) return (error); if (vp->v_type == VDIR) { vrele(vp); return (EISDIR); } vrele(vp); } return (error); } int linux_pwrite(struct thread *td, struct linux_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } int linux_preadv(struct thread *td, struct linux_preadv_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_preadv(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_pwritev(struct thread *td, struct linux_pwritev_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_pwritev(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_mount(struct thread *td, struct linux_mount_args *args) { char fstypename[MFSNAMELEN]; - char mntonname[MNAMELEN], mntfromname[MNAMELEN]; - int error; - int fsflags; + char *mntonname, *mntfromname; + int error, fsflags; + mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK); + mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1, NULL); - if (error) - return (error); + if (error != 0) + goto out; error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL); - if (error) - return (error); + if (error != 0) + goto out; error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL); - if (error) - return (error); + if (error != 0) + goto out; #ifdef DEBUG if (ldebug(mount)) printf(ARGS(mount, "%s, %s, %s"), fstypename, mntfromname, mntonname); #endif if (strcmp(fstypename, "ext2") == 0) { strcpy(fstypename, "ext2fs"); } else if (strcmp(fstypename, "proc") == 0) { strcpy(fstypename, "linprocfs"); } else if (strcmp(fstypename, "vfat") == 0) { strcpy(fstypename, "msdosfs"); } fsflags = 0; if ((args->rwflag & 0xffff0000) == 0xc0ed0000) { /* * Linux SYNC flag is not included; the closest equivalent * FreeBSD has is !ASYNC, which is our default. */ if (args->rwflag & LINUX_MS_RDONLY) fsflags |= MNT_RDONLY; if (args->rwflag & LINUX_MS_NOSUID) fsflags |= MNT_NOSUID; if (args->rwflag & LINUX_MS_NOEXEC) fsflags |= MNT_NOEXEC; if (args->rwflag & LINUX_MS_REMOUNT) fsflags |= MNT_UPDATE; } error = kernel_vmount(fsflags, "fstype", fstypename, "fspath", mntonname, "from", mntfromname, NULL); +out: + free(mntonname, M_TEMP); + free(mntfromname, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { struct linux_umount_args args2; args2.path = args->path; args2.flags = 0; return (linux_umount(td, &args2)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_umount(struct thread *td, struct linux_umount_args *args) { struct unmount_args bsd; bsd.path = args->path; bsd.flags = args->flags; /* XXX correct? */ return (sys_unmount(td, &bsd)); } /* * fcntl family of syscalls */ struct l_flock { l_short l_type; l_short l_whence; l_off_t l_start; l_off_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_off_t)bsd_flock->l_start; linux_flock->l_len = (l_off_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_flock64 { l_short l_type; l_short l_whence; l_loff_t l_start; l_loff_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_loff_t)bsd_flock->l_start; linux_flock->l_len = (l_loff_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; cap_rights_t rights; struct file *fp; long arg; int error, result; switch (args->cmd) { case LINUX_F_DUPFD: return (kern_fcntl(td, args->fd, F_DUPFD, args->arg)); case LINUX_F_GETFD: return (kern_fcntl(td, args->fd, F_GETFD, 0)); case LINUX_F_SETFD: return (kern_fcntl(td, args->fd, F_SETFD, args->arg)); case LINUX_F_GETFL: error = kern_fcntl(td, args->fd, F_GETFL, 0); result = td->td_retval[0]; td->td_retval[0] = 0; if (result & O_RDONLY) td->td_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) td->td_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) td->td_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) td->td_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) td->td_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) td->td_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) td->td_retval[0] |= LINUX_FASYNC; #ifdef LINUX_O_NOFOLLOW if (result & O_NOFOLLOW) td->td_retval[0] |= LINUX_O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (result & O_DIRECT) td->td_retval[0] |= LINUX_O_DIRECT; #endif return (error); case LINUX_F_SETFL: arg = 0; if (args->arg & LINUX_O_NDELAY) arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) arg |= O_FSYNC; if (args->arg & LINUX_FASYNC) arg |= O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (args->arg & LINUX_O_NOFOLLOW) arg |= O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (args->arg & LINUX_O_DIRECT) arg |= O_DIRECT; #endif return (kern_fcntl(td, args->fd, F_SETFL, arg)); case LINUX_F_GETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); case LINUX_F_GETOWN: return (kern_fcntl(td, args->fd, F_GETOWN, 0)); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ error = fget(td, args->fd, cap_rights_init(&rights, CAP_FCNTL), &fp); if (error) return (error); if (fp->f_type == DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); case LINUX_F_DUPFD_CLOEXEC: return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg)); } return (EINVAL); } int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { #ifdef DEBUG if (ldebug(fcntl)) printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd); #endif return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; struct linux_fcntl_args fcntl_args; int error; #ifdef DEBUG if (ldebug(fcntl64)) printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd); #endif switch (args->cmd) { case LINUX_F_GETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock64(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); } fcntl_args.fd = args->fd; fcntl_args.cmd = args->cmd; fcntl_args.arg = args->arg; return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_chown(struct thread *td, struct linux_chown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chown)) printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid); #endif error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, 0); LFREEPATH(path); return (error); } int linux_fchownat(struct thread *td, struct linux_fchownat_args *args) { char *path; int error, dfd, flag; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(fchownat)) printf(ARGS(fchownat, "%s, %d, %d"), path, args->uid, args->gid); #endif flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : AT_SYMLINK_NOFOLLOW; error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid, flag); LFREEPATH(path); return (error); } int linux_lchown(struct thread *td, struct linux_lchown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(lchown)) printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid); #endif error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW); LFREEPATH(path); return (error); } static int convert_fadvice(int advice) { switch (advice) { case LINUX_POSIX_FADV_NORMAL: return (POSIX_FADV_NORMAL); case LINUX_POSIX_FADV_RANDOM: return (POSIX_FADV_RANDOM); case LINUX_POSIX_FADV_SEQUENTIAL: return (POSIX_FADV_SEQUENTIAL); case LINUX_POSIX_FADV_WILLNEED: return (POSIX_FADV_WILLNEED); case LINUX_POSIX_FADV_DONTNEED: return (POSIX_FADV_DONTNEED); case LINUX_POSIX_FADV_NOREUSE: return (POSIX_FADV_NOREUSE); default: return (-1); } } int linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_pipe(struct thread *td, struct linux_pipe_args *args) { int fildes[2]; int error; #ifdef DEBUG if (ldebug(pipe)) printf(ARGS(pipe, "*")); #endif error = kern_pipe(td, fildes, 0, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } int linux_pipe2(struct thread *td, struct linux_pipe2_args *args) { int fildes[2]; int error, flags; #ifdef DEBUG if (ldebug(pipe2)) printf(ARGS(pipe2, "*, %d"), args->flags); #endif if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((args->flags & LINUX_O_NONBLOCK) != 0) flags |= O_NONBLOCK; if ((args->flags & LINUX_O_CLOEXEC) != 0) flags |= O_CLOEXEC; error = kern_pipe(td, fildes, flags, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } int linux_dup3(struct thread *td, struct linux_dup3_args *args) { int cmd; intptr_t newfd; if (args->oldfd == args->newfd) return (EINVAL); if ((args->flags & ~LINUX_O_CLOEXEC) != 0) return (EINVAL); if (args->flags & LINUX_O_CLOEXEC) cmd = F_DUP2FD_CLOEXEC; else cmd = F_DUP2FD; newfd = args->newfd; return (kern_fcntl(td, args->oldfd, cmd, newfd)); } int linux_fallocate(struct thread *td, struct linux_fallocate_args *args) { /* * We emulate only posix_fallocate system call for which * mode should be 0. */ if (args->mode != 0) return (ENOSYS); return (kern_posix_fallocate(td, args->fd, args->offset, args->len)); } Index: head/sys/dev/snp/snp.c =================================================================== --- head/sys/dev/snp/snp.c (revision 318735) +++ head/sys/dev/snp/snp.c (revision 318736) @@ -1,361 +1,369 @@ /*- * Copyright (c) 2008 Ed Schouten * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct cdev *snp_dev; static MALLOC_DEFINE(M_SNP, "snp", "tty snoop device"); /* XXX: should be mtx, but TTY can be locked by Giant. */ #if 0 static struct mtx snp_register_lock; MTX_SYSINIT(snp_register_lock, &snp_register_lock, "tty snoop registration", MTX_DEF); #define SNP_LOCK() mtx_lock(&snp_register_lock) #define SNP_UNLOCK() mtx_unlock(&snp_register_lock) #else static struct sx snp_register_lock; SX_SYSINIT(snp_register_lock, &snp_register_lock, "tty snoop registration"); #define SNP_LOCK() sx_xlock(&snp_register_lock) #define SNP_UNLOCK() sx_xunlock(&snp_register_lock) #endif +#define SNPGTYY_32DEV _IOR('T', 89, uint32_t) + /* * There is no need to have a big input buffer. In most typical setups, * we won't inject much data into the TTY, because users can't type * really fast. */ #define SNP_INPUT_BUFSIZE 16 /* * The output buffer has to be really big. Right now we don't support * any form of flow control, which means we lost any data we can't * accept. We set the output buffer size to about twice the size of a * pseudo-terminal/virtual console's output buffer. */ #define SNP_OUTPUT_BUFSIZE 16384 static d_open_t snp_open; static d_read_t snp_read; static d_write_t snp_write; static d_ioctl_t snp_ioctl; static d_poll_t snp_poll; static struct cdevsw snp_cdevsw = { .d_version = D_VERSION, .d_open = snp_open, .d_read = snp_read, .d_write = snp_write, .d_ioctl = snp_ioctl, .d_poll = snp_poll, .d_name = "snp", }; static th_getc_capture_t snp_getc_capture; static struct ttyhook snp_hook = { .th_getc_capture = snp_getc_capture, }; /* * Per-instance structure. * * List of locks * (r) locked by snp_register_lock on assignment * (t) locked by tty_lock */ struct snp_softc { struct tty *snp_tty; /* (r) TTY we're snooping. */ struct ttyoutq snp_outq; /* (t) Output queue. */ struct cv snp_outwait; /* (t) Output wait queue. */ struct selinfo snp_outpoll; /* (t) Output polling. */ }; static void snp_dtor(void *data) { struct snp_softc *ss = data; struct tty *tp; tp = ss->snp_tty; if (tp != NULL) { tty_lock(tp); ttyoutq_free(&ss->snp_outq); ttyhook_unregister(tp); } cv_destroy(&ss->snp_outwait); free(ss, M_SNP); } /* * Snoop device node routines. */ static int snp_open(struct cdev *dev, int flag, int mode, struct thread *td) { struct snp_softc *ss; /* Allocate per-snoop data. */ ss = malloc(sizeof(struct snp_softc), M_SNP, M_WAITOK|M_ZERO); cv_init(&ss->snp_outwait, "snp out"); devfs_set_cdevpriv(ss, snp_dtor); return (0); } static int snp_read(struct cdev *dev, struct uio *uio, int flag) { int error, oresid = uio->uio_resid; struct snp_softc *ss; struct tty *tp; if (uio->uio_resid == 0) return (0); error = devfs_get_cdevpriv((void **)&ss); if (error != 0) return (error); tp = ss->snp_tty; if (tp == NULL || tty_gone(tp)) return (EIO); tty_lock(tp); for (;;) { error = ttyoutq_read_uio(&ss->snp_outq, tp, uio); if (error != 0 || uio->uio_resid != oresid) break; /* Wait for more data. */ if (flag & O_NONBLOCK) { error = EWOULDBLOCK; break; } error = cv_wait_sig(&ss->snp_outwait, tp->t_mtx); if (error != 0) break; if (tty_gone(tp)) { error = EIO; break; } } tty_unlock(tp); return (error); } static int snp_write(struct cdev *dev, struct uio *uio, int flag) { struct snp_softc *ss; struct tty *tp; int error, len; char in[SNP_INPUT_BUFSIZE]; error = devfs_get_cdevpriv((void **)&ss); if (error != 0) return (error); tp = ss->snp_tty; if (tp == NULL || tty_gone(tp)) return (EIO); while (uio->uio_resid > 0) { /* Read new data. */ len = imin(uio->uio_resid, sizeof in); error = uiomove(in, len, uio); if (error != 0) return (error); tty_lock(tp); /* Driver could have abandoned the TTY in the mean time. */ if (tty_gone(tp)) { tty_unlock(tp); return (ENXIO); } /* * Deliver data to the TTY. Ignore errors for now, * because we shouldn't bail out when we're running * close to the watermarks. */ ttydisc_rint_simple(tp, in, len); ttydisc_rint_done(tp); tty_unlock(tp); } return (0); } static int snp_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, struct thread *td) { struct snp_softc *ss; struct tty *tp; int error; error = devfs_get_cdevpriv((void **)&ss); if (error != 0) return (error); switch (cmd) { case SNPSTTY: /* Bind TTY to snoop instance. */ SNP_LOCK(); if (ss->snp_tty != NULL) { SNP_UNLOCK(); return (EBUSY); } /* * XXXRW / XXXJA: no capability check here. */ error = ttyhook_register(&ss->snp_tty, td->td_proc, *(int *)data, &snp_hook, ss); SNP_UNLOCK(); if (error != 0) return (error); /* Now that went okay, allocate a buffer for the queue. */ tp = ss->snp_tty; tty_lock(tp); ttyoutq_setsize(&ss->snp_outq, tp, SNP_OUTPUT_BUFSIZE); tty_unlock(tp); return (0); case SNPGTTY: /* Obtain device number of associated TTY. */ if (ss->snp_tty == NULL) *(dev_t *)data = NODEV; else *(dev_t *)data = tty_udev(ss->snp_tty); + return (0); + case SNPGTYY_32DEV: + if (ss->snp_tty == NULL) + *(uint32_t *)data = -1; + else + *(uint32_t *)data = tty_udev(ss->snp_tty); /* trunc */ return (0); case FIONREAD: tp = ss->snp_tty; if (tp != NULL) { tty_lock(tp); *(int *)data = ttyoutq_bytesused(&ss->snp_outq); tty_unlock(tp); } else { *(int *)data = 0; } return (0); default: return (ENOTTY); } } static int snp_poll(struct cdev *dev, int events, struct thread *td) { struct snp_softc *ss; struct tty *tp; int revents; if (devfs_get_cdevpriv((void **)&ss) != 0) return (events & (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); revents = 0; if (events & (POLLIN | POLLRDNORM)) { tp = ss->snp_tty; if (tp != NULL) { tty_lock(tp); if (ttyoutq_bytesused(&ss->snp_outq) > 0) revents |= events & (POLLIN | POLLRDNORM); tty_unlock(tp); } } if (revents == 0) selrecord(td, &ss->snp_outpoll); return (revents); } /* * TTY hook events. */ static int snp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: snp_dev = make_dev(&snp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "snp"); return (0); case MOD_UNLOAD: /* XXX: Make existing users leave. */ destroy_dev(snp_dev); return (0); default: return (EOPNOTSUPP); } } static void snp_getc_capture(struct tty *tp, const void *buf, size_t len) { struct snp_softc *ss = ttyhook_softc(tp); ttyoutq_write(&ss->snp_outq, buf, len); cv_broadcast(&ss->snp_outwait); selwakeup(&ss->snp_outpoll); } static moduledata_t snp_mod = { "snp", snp_modevent, NULL }; DECLARE_MODULE(snp, snp_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); Index: head/sys/fs/devfs/devfs_devs.c =================================================================== --- head/sys/fs/devfs/devfs_devs.c (revision 318735) +++ head/sys/fs/devfs/devfs_devs.c (revision 318736) @@ -1,729 +1,741 @@ /*- * Copyright (c) 2000,2004 * Poul-Henning Kamp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36 * * $FreeBSD$ */ +#include "opt_compat.h" + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The one true (but secret) list of active devices in the system. * Locked by dev_lock()/devmtx */ struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list); struct unrhdr *devfs_inos; static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2"); static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3"); static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage"); SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem"); static unsigned devfs_generation; SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD, &devfs_generation, 0, "DEVFS generation number"); unsigned devfs_rule_depth = 1; SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW, &devfs_rule_depth, 0, "Max depth of ruleset include"); /* * Helper sysctl for devname(3). We're given a dev_t and return the * name, if any, registered by the device driver. */ static int sysctl_devname(SYSCTL_HANDLER_ARGS) { int error; dev_t ud; +#ifdef COMPAT_FREEBSD11 + uint32_t ud_compat; +#endif struct cdev_priv *cdp; struct cdev *dev; - error = SYSCTL_IN(req, &ud, sizeof (ud)); +#ifdef COMPAT_FREEBSD11 + if (req->newlen == sizeof(ud_compat)) { + error = SYSCTL_IN(req, &ud_compat, sizeof(ud_compat)); + if (error == 0) + ud = ud_compat == (uint32_t)NODEV ? NODEV : ud_compat; + } else +#endif + error = SYSCTL_IN(req, &ud, sizeof (ud)); if (error) return (error); if (ud == NODEV) return (EINVAL); dev = NULL; dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) if (cdp->cdp_inode == ud) { dev = &cdp->cdp_c; dev_refl(dev); break; } dev_unlock(); if (dev == NULL) return (ENOENT); error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1); dev_rel(dev); return (error); } SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE, NULL, 0, sysctl_devname, "", "devname(3) handler"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); struct cdev * devfs_alloc(int flags) { struct cdev_priv *cdp; struct cdev *cdev; struct timespec ts; cdp = malloc(sizeof *cdp, M_CDEVP, M_ZERO | ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK)); if (cdp == NULL) return (NULL); cdp->cdp_dirents = &cdp->cdp_dirent0; cdev = &cdp->cdp_c; LIST_INIT(&cdev->si_children); vfs_timestamp(&ts); cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts; return (cdev); } int devfs_dev_exists(const char *name) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { if ((cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (devfs_pathpath(cdp->cdp_c.si_name, name) != 0) return (1); if (devfs_pathpath(name, cdp->cdp_c.si_name) != 0) return (1); } if (devfs_dir_find(name) != 0) return (1); return (0); } void devfs_free(struct cdev *cdev) { struct cdev_priv *cdp; cdp = cdev2priv(cdev); if (cdev->si_cred != NULL) crfree(cdev->si_cred); devfs_free_cdp_inode(cdp->cdp_inode); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); free(cdp, M_CDEVP); } struct devfs_dirent * devfs_find(struct devfs_dirent *dd, const char *name, int namelen, int type) { struct devfs_dirent *de; TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (namelen != de->de_dirent->d_namlen) continue; if (type != 0 && type != de->de_dirent->d_type) continue; /* * The race with finding non-active name is not * completely closed by the check, but it is similar * to the devfs_allocv() in making it unlikely enough. */ if (de->de_dirent->d_type == DT_CHR && (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (bcmp(name, de->de_dirent->d_name, namelen) != 0) continue; break; } KASSERT(de == NULL || (de->de_flags & DE_DOOMED) == 0, ("devfs_find: returning a doomed entry")); return (de); } struct devfs_dirent * devfs_newdirent(char *name, int namelen) { int i; struct devfs_dirent *de; struct dirent d; d.d_namlen = namelen; i = sizeof(*de) + GENERIC_DIRSIZ(&d); de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO); de->de_dirent = (struct dirent *)(de + 1); de->de_dirent->d_namlen = namelen; de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d); bcopy(name, de->de_dirent->d_name, namelen); de->de_dirent->d_name[namelen] = '\0'; vfs_timestamp(&de->de_ctime); de->de_mtime = de->de_atime = de->de_ctime; de->de_links = 1; de->de_holdcnt = 1; #ifdef MAC mac_devfs_init(de); #endif return (de); } struct devfs_dirent * devfs_parent_dirent(struct devfs_dirent *de) { if (de->de_dirent->d_type != DT_DIR) return (de->de_dir); if (de->de_flags & (DE_DOT | DE_DOTDOT)) return (NULL); de = TAILQ_FIRST(&de->de_dlist); /* "." */ if (de == NULL) return (NULL); de = TAILQ_NEXT(de, de_list); /* ".." */ if (de == NULL) return (NULL); return (de->de_dir); } struct devfs_dirent * devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; /* Create the new directory */ dd = devfs_newdirent(name, namelen); TAILQ_INIT(&dd->de_dlist); dd->de_dirent->d_type = DT_DIR; dd->de_mode = 0555; dd->de_links = 2; dd->de_dir = dd; if (inode != 0) dd->de_inode = inode; else dd->de_inode = alloc_unr(devfs_inos); /* * "." and ".." are always the two first entries in the * de_dlist list. * * Create the "." entry in the new directory. */ de = devfs_newdirent(".", 1); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); de->de_dir = dd; /* Create the ".." entry in the new directory. */ de = devfs_newdirent("..", 2); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOTDOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); if (dotdot == NULL) { de->de_dir = dd; } else { de->de_dir = dotdot; sx_assert(&dmp->dm_lock, SX_XLOCKED); TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list); dotdot->de_links++; devfs_rules_apply(dmp, dd); } #ifdef MAC mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd); #endif return (dd); } void devfs_dirent_free(struct devfs_dirent *de) { struct vnode *vp; vp = de->de_vnode; mtx_lock(&devfs_de_interlock); if (vp != NULL && vp->v_data == de) vp->v_data = NULL; mtx_unlock(&devfs_de_interlock); free(de, M_DEVFS3); } /* * Removes a directory if it is empty. Also empty parent directories are * removed recursively. */ static void devfs_rmdir_empty(struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_dirent *dd, *de_dot, *de_dotdot; sx_assert(&dm->dm_lock, SX_XLOCKED); for (;;) { KASSERT(de->de_dirent->d_type == DT_DIR, ("devfs_rmdir_empty: de is not a directory")); if ((de->de_flags & DE_DOOMED) != 0 || de == dm->dm_rootdir) return; de_dot = TAILQ_FIRST(&de->de_dlist); KASSERT(de_dot != NULL, ("devfs_rmdir_empty: . missing")); de_dotdot = TAILQ_NEXT(de_dot, de_list); KASSERT(de_dotdot != NULL, ("devfs_rmdir_empty: .. missing")); /* Return if the directory is not empty. */ if (TAILQ_NEXT(de_dotdot, de_list) != NULL) return; dd = devfs_parent_dirent(de); KASSERT(dd != NULL, ("devfs_rmdir_empty: NULL dd")); TAILQ_REMOVE(&de->de_dlist, de_dot, de_list); TAILQ_REMOVE(&de->de_dlist, de_dotdot, de_list); TAILQ_REMOVE(&dd->de_dlist, de, de_list); DEVFS_DE_HOLD(dd); devfs_delete(dm, de, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dot, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dotdot, DEVFS_DEL_NORECURSE); if (DEVFS_DE_DROP(dd)) { devfs_dirent_free(dd); return; } de = dd; } } /* * The caller needs to hold the dm for the duration of the call since * dm->dm_lock may be temporary dropped. */ void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int flags) { struct devfs_dirent *dd; struct vnode *vp; KASSERT((de->de_flags & DE_DOOMED) == 0, ("devfs_delete doomed dirent")); de->de_flags |= DE_DOOMED; if ((flags & DEVFS_DEL_NORECURSE) == 0) { dd = devfs_parent_dirent(de); if (dd != NULL) DEVFS_DE_HOLD(dd); if (de->de_flags & DE_USER) { KASSERT(dd != NULL, ("devfs_delete: NULL dd")); devfs_dir_unref_de(dm, dd); } } else dd = NULL; mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); vholdl(vp); sx_unlock(&dm->dm_lock); if ((flags & DEVFS_DEL_VNLOCKED) == 0) vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); else VI_UNLOCK(vp); vgone(vp); if ((flags & DEVFS_DEL_VNLOCKED) == 0) VOP_UNLOCK(vp, 0); vdrop(vp); sx_xlock(&dm->dm_lock); } else mtx_unlock(&devfs_de_interlock); if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; } #ifdef MAC mac_devfs_destroy(de); #endif if (de->de_inode > DEVFS_ROOTINO) { devfs_free_cdp_inode(de->de_inode); de->de_inode = 0; } if (DEVFS_DE_DROP(de)) devfs_dirent_free(de); if (dd != NULL) { if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else devfs_rmdir_empty(dm, dd); } } /* * Called on unmount. * Recursively removes the entire tree. * The caller needs to hold the dm for the duration of the call. */ static void devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd) { struct devfs_dirent *de; sx_assert(&dm->dm_lock, SX_XLOCKED); DEVFS_DE_HOLD(dd); for (;;) { /* * Use TAILQ_LAST() to remove "." and ".." last. * We might need ".." to resolve a path in * devfs_dir_unref_de(). */ de = TAILQ_LAST(&dd->de_dlist, devfs_dlist_head); if (de == NULL) break; TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_flags & DE_USER) devfs_dir_unref_de(dm, dd); if (de->de_flags & (DE_DOT | DE_DOTDOT)) devfs_delete(dm, de, DEVFS_DEL_NORECURSE); else if (de->de_dirent->d_type == DT_DIR) devfs_purge(dm, de); else devfs_delete(dm, de, DEVFS_DEL_NORECURSE); } if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else if ((dd->de_flags & DE_DOOMED) == 0) devfs_delete(dm, dd, DEVFS_DEL_NORECURSE); } /* * Each cdev_priv has an array of pointers to devfs_dirent which is indexed * by the mount points dm_idx. * This function extends the array when necessary, taking into account that * the default array is 1 element and not malloc'ed. */ static void devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm) { struct devfs_dirent **dep; int siz; siz = (dm->dm_idx + 1) * sizeof *dep; dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO); dev_lock(); if (dm->dm_idx <= cdp->cdp_maxdirent) { /* We got raced */ dev_unlock(); free(dep, M_DEVFS2); return; } memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); cdp->cdp_dirents = dep; /* * XXX: if malloc told us how much we actually got this could * XXX: be optimized. */ cdp->cdp_maxdirent = dm->dm_idx; dev_unlock(); } /* * The caller needs to hold the dm for the duration of the call. */ static int devfs_populate_loop(struct devfs_mount *dm, int cleanup) { struct cdev_priv *cdp; struct devfs_dirent *de; struct devfs_dirent *dd, *dt; struct cdev *pdev; int de_flags, depth, j; char *q, *s; sx_assert(&dm->dm_lock, SX_XLOCKED); dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents")); /* * If we are unmounting, or the device has been destroyed, * clean up our dirent. */ if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) && dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; cdp->cdp_dirents[dm->dm_idx] = NULL; KASSERT(cdp == de->de_cdp, ("%s %d %s %p %p", __func__, __LINE__, cdp->cdp_c.si_name, cdp, de->de_cdp)); KASSERT(de->de_dir != NULL, ("Null de->de_dir")); dev_unlock(); TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); de->de_cdp = NULL; de->de_inode = 0; devfs_delete(dm, de, 0); dev_lock(); cdp->cdp_inuse--; dev_unlock(); return (1); } /* * GC any lingering devices */ if (!(cdp->cdp_flags & CDP_ACTIVE)) { if (cdp->cdp_inuse > 0) continue; TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); return (1); } /* * Don't create any new dirents if we are unmounting */ if (cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; KASSERT(cdp == de->de_cdp, ("inconsistent cdp")); continue; } cdp->cdp_inuse++; dev_unlock(); if (dm->dm_idx > cdp->cdp_maxdirent) devfs_metoo(cdp, dm); dd = dm->dm_rootdir; s = cdp->cdp_c.si_name; for (;;) { for (q = s; *q != '/' && *q != '\0'; q++) continue; if (*q != '/') break; de = devfs_find(dd, s, q - s, 0); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); else if (de->de_dirent->d_type == DT_LNK) { de = devfs_find(dd, s, q - s, DT_DIR); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); de->de_flags |= DE_COVERED; } s = q + 1; dd = de; KASSERT(dd->de_dirent->d_type == DT_DIR && (dd->de_flags & (DE_DOT | DE_DOTDOT)) == 0, ("%s: invalid directory (si_name=%s)", __func__, cdp->cdp_c.si_name)); } de_flags = 0; de = devfs_find(dd, s, q - s, DT_LNK); if (de != NULL) de_flags |= DE_COVERED; de = devfs_newdirent(s, q - s); if (cdp->cdp_c.si_flags & SI_ALIAS) { de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_dirent->d_type = DT_LNK; pdev = cdp->cdp_c.si_parent; dt = dd; depth = 0; while (dt != dm->dm_rootdir && (dt = devfs_parent_dirent(dt)) != NULL) depth++; j = depth * 3 + strlen(pdev->si_name) + 1; de->de_symlink = malloc(j, M_DEVFS, M_WAITOK); de->de_symlink[0] = 0; while (depth-- > 0) strcat(de->de_symlink, "../"); strcat(de->de_symlink, pdev->si_name); } else { de->de_uid = cdp->cdp_c.si_uid; de->de_gid = cdp->cdp_c.si_gid; de->de_mode = cdp->cdp_c.si_mode; de->de_dirent->d_type = DT_CHR; } de->de_flags |= de_flags; de->de_inode = cdp->cdp_inode; de->de_cdp = cdp; #ifdef MAC mac_devfs_create_device(cdp->cdp_c.si_cred, dm->dm_mount, &cdp->cdp_c, de); #endif de->de_dir = dd; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); devfs_rules_apply(dm, de); dev_lock(); /* XXX: could check that cdp is still active here */ KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL, ("%s %d\n", __func__, __LINE__)); cdp->cdp_dirents[dm->dm_idx] = de; KASSERT(de->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); dev_unlock(); return (1); } dev_unlock(); return (0); } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_populate(struct devfs_mount *dm) { unsigned gen; sx_assert(&dm->dm_lock, SX_XLOCKED); gen = devfs_generation; if (dm->dm_generation == gen) return; while (devfs_populate_loop(dm, 0)) continue; dm->dm_generation = gen; } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_cleanup(struct devfs_mount *dm) { sx_assert(&dm->dm_lock, SX_XLOCKED); while (devfs_populate_loop(dm, 1)) continue; devfs_purge(dm, dm->dm_rootdir); } /* * devfs_create() and devfs_destroy() are called from kern_conf.c and * in both cases the devlock() mutex is held, so no further locking * is necessary and no sleeping allowed. */ void devfs_create(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags |= CDP_ACTIVE; cdp->cdp_inode = alloc_unrl(devfs_inos); dev_refl(dev); TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list); devfs_generation++; } void devfs_destroy(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags &= ~CDP_ACTIVE; devfs_generation++; } ino_t devfs_alloc_cdp_inode(void) { return (alloc_unr(devfs_inos)); } void devfs_free_cdp_inode(ino_t ino) { if (ino > 0) free_unr(devfs_inos, ino); } static void devfs_devs_init(void *junk __unused) { devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); } SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); Index: head/sys/fs/devfs/devfs_vnops.c =================================================================== --- head/sys/fs/devfs/devfs_vnops.c (revision 318735) +++ head/sys/fs/devfs/devfs_vnops.c (revision 318736) @@ -1,1929 +1,1930 @@ /*- * Copyright (c) 2000-2004 * Poul-Henning Kamp. All rights reserved. * Copyright (c) 1989, 1992-1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 * * $FreeBSD$ */ /* * TODO: * mkdir: want it ? */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct vop_vector devfs_vnodeops; static struct vop_vector devfs_specops; static struct fileops devfs_ops_f; #include #include #include #include #include #include static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); struct mtx devfs_de_interlock; MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); struct sx clone_drain_lock; SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); struct mtx cdevpriv_mtx; MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); SYSCTL_DECL(_vfs_devfs); static int devfs_dotimes; SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); /* * Update devfs node timestamp. Note that updates are unlocked and * stat(2) could see partially updated times. */ static void devfs_timestamp(struct timespec *tsp) { time_t ts; if (devfs_dotimes) { vfs_timestamp(tsp); } else { ts = time_second; if (tsp->tv_sec != ts) { tsp->tv_sec = ts; tsp->tv_nsec = 0; } } } static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, int *ref) { *dswp = devvn_refthread(fp->f_vnode, devp, ref); if (*devp != fp->f_data) { if (*dswp != NULL) dev_relthread(*devp, *ref); return (ENXIO); } KASSERT((*devp)->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); if (*dswp == NULL) return (ENXIO); curthread->td_fpop = fp; return (0); } int devfs_get_cdevpriv(void **datap) { struct file *fp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (EBADF); p = fp->f_cdevpriv; if (p != NULL) { error = 0; *datap = p->cdpd_data; } else error = ENOENT; return (error); } int devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) { struct file *fp; struct cdev_priv *cdp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (ENOENT); cdp = cdev2priv((struct cdev *)fp->f_data); p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); p->cdpd_data = priv; p->cdpd_dtr = priv_dtr; p->cdpd_fp = fp; mtx_lock(&cdevpriv_mtx); if (fp->f_cdevpriv == NULL) { LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); fp->f_cdevpriv = p; mtx_unlock(&cdevpriv_mtx); error = 0; } else { mtx_unlock(&cdevpriv_mtx); free(p, M_CDEVPDATA); error = EBUSY; } return (error); } void devfs_destroy_cdevpriv(struct cdev_privdata *p) { mtx_assert(&cdevpriv_mtx, MA_OWNED); KASSERT(p->cdpd_fp->f_cdevpriv == p, ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p)); p->cdpd_fp->f_cdevpriv = NULL; LIST_REMOVE(p, cdpd_list); mtx_unlock(&cdevpriv_mtx); (p->cdpd_dtr)(p->cdpd_data); free(p, M_CDEVPDATA); } static void devfs_fpdrop(struct file *fp) { struct cdev_privdata *p; mtx_lock(&cdevpriv_mtx); if ((p = fp->f_cdevpriv) == NULL) { mtx_unlock(&cdevpriv_mtx); return; } devfs_destroy_cdevpriv(p); } void devfs_clear_cdevpriv(void) { struct file *fp; fp = curthread->td_fpop; if (fp == NULL) return; devfs_fpdrop(fp); } /* * On success devfs_populate_vp() returns with dmp->dm_lock held. */ static int devfs_populate_vp(struct vnode *vp) { struct devfs_dirent *de; struct devfs_mount *dmp; int locked; ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); dmp = VFSTODEVFS(vp->v_mount); locked = VOP_ISLOCKED(vp); sx_xlock(&dmp->dm_lock); DEVFS_DMP_HOLD(dmp); /* Can't call devfs_populate() with the vnode lock held. */ VOP_UNLOCK(vp, 0); devfs_populate(dmp); sx_xunlock(&dmp->dm_lock); vn_lock(vp, locked | LK_RETRY); sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ERESTART); } if ((vp->v_iflag & VI_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); return (ERESTART); } de = vp->v_data; KASSERT(de != NULL, ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); if ((de->de_flags & DE_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); return (ERESTART); } return (0); } static int devfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct devfs_mount *dmp; char *buf = ap->a_buf; int *buflen = ap->a_buflen; struct devfs_dirent *dd, *de; int i, error; dmp = VFSTODEVFS(vp->v_mount); error = devfs_populate_vp(vp); if (error != 0) return (error); i = *buflen; dd = vp->v_data; if (vp->v_type == VCHR) { i -= strlen(dd->de_cdp->cdp_c.si_name); if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_cdp->cdp_c.si_name, buf + i, strlen(dd->de_cdp->cdp_c.si_name)); de = dd->de_dir; } else if (vp->v_type == VDIR) { if (dd == dmp->dm_rootdir) { *dvp = vp; vref(*dvp); goto finished; } i -= dd->de_dirent->d_namlen; if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen); de = dd; } else { error = ENOENT; goto finished; } *buflen = i; de = devfs_parent_dirent(de); if (de == NULL) { error = ENOENT; goto finished; } mtx_lock(&devfs_de_interlock); *dvp = de->de_vnode; if (*dvp != NULL) { VI_LOCK(*dvp); mtx_unlock(&devfs_de_interlock); vholdl(*dvp); VI_UNLOCK(*dvp); vref(*dvp); vdrop(*dvp); } else { mtx_unlock(&devfs_de_interlock); error = ENOENT; } finished: sx_xunlock(&dmp->dm_lock); return (error); } /* * Construct the fully qualified path name relative to the mountpoint. * If a NULL cnp is provided, no '/' is appended to the resulting path. */ char * devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, struct componentname *cnp) { int i; struct devfs_dirent *de; sx_assert(&dmp->dm_lock, SA_LOCKED); i = SPECNAMELEN; buf[i] = '\0'; if (cnp != NULL) i -= cnp->cn_namelen; if (i < 0) return (NULL); if (cnp != NULL) bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); de = dd; while (de != dmp->dm_rootdir) { if (cnp != NULL || i < SPECNAMELEN) { i--; if (i < 0) return (NULL); buf[i] = '/'; } i -= de->de_dirent->d_namlen; if (i < 0) return (NULL); bcopy(de->de_dirent->d_name, buf + i, de->de_dirent->d_namlen); de = devfs_parent_dirent(de); if (de == NULL) return (NULL); } return (buf + i); } static int devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, struct devfs_dirent *de) { int not_found; not_found = 0; if (de->de_flags & DE_DOOMED) not_found = 1; if (DEVFS_DE_DROP(de)) { KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); devfs_dirent_free(de); } if (DEVFS_DMP_DROP(dmp)) { KASSERT(not_found == 1, ("DEVFS mount struct freed before dirent")); not_found = 2; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } if (not_found == 1 || (drop_dm_lock && not_found != 2)) sx_unlock(&dmp->dm_lock); return (not_found); } static void devfs_insmntque_dtr(struct vnode *vp, void *arg) { struct devfs_dirent *de; de = (struct devfs_dirent *)arg; mtx_lock(&devfs_de_interlock); vp->v_data = NULL; de->de_vnode = NULL; mtx_unlock(&devfs_de_interlock); vgone(vp); vput(vp); } /* * devfs_allocv shall be entered with dmp->dm_lock held, and it drops * it on return. */ int devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, struct vnode **vpp) { int error; struct vnode *vp; struct cdev *dev; struct devfs_mount *dmp; struct cdevsw *dsw; dmp = VFSTODEVFS(mp); if (de->de_flags & DE_DOOMED) { sx_xunlock(&dmp->dm_lock); return (ENOENT); } loop: DEVFS_DE_HOLD(de); DEVFS_DMP_HOLD(dmp); mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); sx_xunlock(&dmp->dm_lock); vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); sx_xlock(&dmp->dm_lock); if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } else if ((vp->v_iflag & VI_DOOMED) != 0) { mtx_lock(&devfs_de_interlock); if (de->de_vnode == vp) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vput(vp); goto loop; } sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } mtx_unlock(&devfs_de_interlock); if (de->de_dirent->d_type == DT_CHR) { if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { devfs_allocv_drop_refs(1, dmp, de); return (ENOENT); } dev = &de->de_cdp->cdp_c; } else { dev = NULL; } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); printf("devfs_allocv: failed to allocate new vnode\n"); return (error); } if (de->de_dirent->d_type == DT_CHR) { vp->v_type = VCHR; VI_LOCK(vp); dev_lock(); dev_refl(dev); /* XXX: v_rdev should be protect by vnode lock */ vp->v_rdev = dev; KASSERT(vp->v_usecount == 1, ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); dev->si_usecount += vp->v_usecount; /* Special casing of ttys for deadfs. Probably redundant. */ dsw = dev->si_devsw; if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) vp->v_vflag |= VV_ISTTY; dev_unlock(); VI_UNLOCK(vp); if ((dev->si_flags & SI_ETERNAL) != 0) vp->v_vflag |= VV_ETERNALDEV; vp->v_op = &devfs_specops; } else if (de->de_dirent->d_type == DT_DIR) { vp->v_type = VDIR; } else if (de->de_dirent->d_type == DT_LNK) { vp->v_type = VLNK; } else { vp->v_type = VBAD; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); VN_LOCK_ASHARE(vp); mtx_lock(&devfs_de_interlock); vp->v_data = de; de->de_vnode = vp; mtx_unlock(&devfs_de_interlock); error = insmntque1(vp, mp, devfs_insmntque_dtr, de); if (error != 0) { (void) devfs_allocv_drop_refs(1, dmp, de); return (error); } if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } #ifdef MAC mac_devfs_vnode_associate(mp, de, vp); #endif sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } static int devfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct proc *p; int error; de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, ap->a_accmode, ap->a_cred, NULL); if (error == 0) return (0); if (error != EACCES) return (error); p = ap->a_td->td_proc; /* We do, however, allow access to the controlling terminal */ PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == de->de_cdp) error = 0; PROC_UNLOCK(p); return (error); } _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, "devfs-only flag reuse failed"); static int devfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp, *oldvp; struct thread *td = ap->a_td; struct proc *p; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; int dflags, error, ref, vp_locked; /* * XXX: Don't call d_close() if we were called because of * XXX: insmntque1() failure. */ if (vp->v_data == NULL) return (0); /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (td != NULL) { p = td->td_proc; PROC_LOCK(p); if (vp == p->p_session->s_ttyvp) { PROC_UNLOCK(p); oldvp = NULL; sx_xlock(&proctree_lock); if (vp == p->p_session->s_ttyvp) { SESS_LOCK(p->p_session); VI_LOCK(vp); if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { p->p_session->s_ttyvp = NULL; p->p_session->s_ttydp = NULL; oldvp = vp; } VI_UNLOCK(vp); SESS_UNLOCK(p->p_session); } sx_xunlock(&proctree_lock); if (oldvp != NULL) vrele(oldvp); } else PROC_UNLOCK(p); } /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); dflags = 0; VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { /* Forced close. */ dflags |= FREVOKE | FNONBLOCK; } else if (dsw->d_flags & D_TRACKCLOSE) { /* Keep device updated on status. */ } else if (count_dev(dev) > 1) { VI_UNLOCK(vp); dev_relthread(dev, ref); return (0); } if (count_dev(dev) == 1) dflags |= FLASTCLOSE; vholdl(vp); VI_UNLOCK(vp); vp_locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); KASSERT(dev->si_refcount > 0, ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); dev_relthread(dev, ref); vn_lock(vp, vp_locked | LK_RETRY); vdrop(vp); return (error); } static int devfs_close_f(struct file *fp, struct thread *td) { int error; struct file *fpop; /* * NB: td may be NULL if this descriptor is closed due to * garbage collection from a closed UNIX domain socket. */ fpop = curthread->td_fpop; curthread->td_fpop = fp; error = vnops.fo_close(fp, td); curthread->td_fpop = fpop; /* * The f_cdevpriv cannot be assigned non-NULL value while we * are destroying the file. */ if (fp->f_cdevpriv != NULL) devfs_fpdrop(fp); return (error); } static int devfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct devfs_dirent *de; struct devfs_mount *dmp; struct cdev *dev; struct timeval boottime; int error; error = devfs_populate_vp(vp); if (error != 0) return (error); dmp = VFSTODEVFS(vp->v_mount); sx_xunlock(&dmp->dm_lock); de = vp->v_data; KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); if (vp->v_type == VDIR) { de = de->de_dir; KASSERT(de != NULL, ("Null dir dirent in devfs_getattr vp=%p", vp)); } vap->va_uid = de->de_uid; vap->va_gid = de->de_gid; vap->va_mode = de->de_mode; if (vp->v_type == VLNK) vap->va_size = strlen(de->de_symlink); else if (vp->v_type == VDIR) vap->va_size = vap->va_bytes = DEV_BSIZE; else vap->va_size = 0; if (vp->v_type != VDIR) vap->va_bytes = 0; vap->va_blocksize = DEV_BSIZE; vap->va_type = vp->v_type; getboottime(&boottime); #define fix(aa) \ do { \ if ((aa).tv_sec <= 3600) { \ (aa).tv_sec = boottime.tv_sec; \ (aa).tv_nsec = boottime.tv_usec * 1000; \ } \ } while (0) if (vp->v_type != VCHR) { fix(de->de_atime); vap->va_atime = de->de_atime; fix(de->de_mtime); vap->va_mtime = de->de_mtime; fix(de->de_ctime); vap->va_ctime = de->de_ctime; } else { dev = vp->v_rdev; fix(dev->si_atime); vap->va_atime = dev->si_atime; fix(dev->si_mtime); vap->va_mtime = dev->si_mtime; fix(dev->si_ctime); vap->va_ctime = dev->si_ctime; vap->va_rdev = cdev2priv(dev)->cdp_inode; } vap->va_gen = 0; vap->va_flags = 0; vap->va_filerev = 0; vap->va_nlink = de->de_links; vap->va_fileid = de->de_inode; return (error); } /* ARGSUSED */ static int devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) { struct file *fpop; int error; fpop = td->td_fpop; td->td_fpop = fp; error = vnops.fo_ioctl(fp, com, data, cred, td); td->td_fpop = fpop; return (error); } static int devfs_ioctl(struct vop_ioctl_args *ap) { struct fiodgname_arg *fgn; struct vnode *vpold, *vp; struct cdevsw *dsw; struct thread *td; struct cdev *dev; int error, ref, i; const char *p; u_long com; vp = ap->a_vp; com = ap->a_command; td = ap->a_td; dsw = devvn_refthread(vp, &dev, &ref); if (dsw == NULL) return (ENXIO); KASSERT(dev->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); if (com == FIODTYPE) { *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; error = 0; goto out; } else if (com == FIODGNAME) { fgn = ap->a_data; p = devtoname(dev); i = strlen(p) + 1; if (i > fgn->len) error = EINVAL; else error = copyout(p, fgn->buf, i); goto out; } error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); out: dev_relthread(dev, ref); if (error == ENOIOCTL) error = ENOTTY; if (error == 0 && com == TIOCSCTTY) { /* Do nothing if reassigning same control tty */ sx_slock(&proctree_lock); if (td->td_proc->p_session->s_ttyvp == vp) { sx_sunlock(&proctree_lock); return (0); } vpold = td->td_proc->p_session->s_ttyvp; VREF(vp); SESS_LOCK(td->td_proc->p_session); td->td_proc->p_session->s_ttyvp = vp; td->td_proc->p_session->s_ttydp = cdev2priv(dev); SESS_UNLOCK(td->td_proc->p_session); sx_sunlock(&proctree_lock); /* Get rid of reference to old control tty */ if (vpold) vrele(vpold); } return (error); } /* ARGSUSED */ static int devfs_kqfilter_f(struct file *fp, struct knote *kn) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; struct thread *td; td = curthread; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error) return (error); error = dsw->d_kqfilter(dev, kn); td->td_fpop = fpop; dev_relthread(dev, ref); return (error); } static inline int devfs_prison_check(struct devfs_dirent *de, struct thread *td) { struct cdev_priv *cdp; struct ucred *dcr; struct proc *p; int error; cdp = de->de_cdp; if (cdp == NULL) return (0); dcr = cdp->cdp_c.si_cred; if (dcr == NULL) return (0); error = prison_check(td->td_ucred, dcr); if (error == 0) return (0); /* We do, however, allow access to the controlling terminal */ p = td->td_proc; PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == cdp) error = 0; PROC_UNLOCK(p); return (error); } static int devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) { struct componentname *cnp; struct vnode *dvp, **vpp; struct thread *td; struct devfs_dirent *de, *dd; struct devfs_dirent **dde; struct devfs_mount *dmp; struct cdev *cdev; int error, flags, nameiop, dvplocked; char specname[SPECNAMELEN + 1], *pname; cnp = ap->a_cnp; vpp = ap->a_vpp; dvp = ap->a_dvp; pname = cnp->cn_nameptr; td = cnp->cn_thread; flags = cnp->cn_flags; nameiop = cnp->cn_nameiop; dmp = VFSTODEVFS(dvp->v_mount); dd = dvp->v_data; *vpp = NULLVP; if ((flags & ISLASTCN) && nameiop == RENAME) return (EOPNOTSUPP); if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) return (EIO); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); if (error) return (error); if (cnp->cn_namelen == 1 && *pname == '.') { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); *vpp = dvp; VREF(dvp); return (0); } if (flags & ISDOTDOT) { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); de = devfs_parent_dirent(dd); if (de == NULL) return (ENOENT); dvplocked = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; vn_lock(dvp, dvplocked | LK_RETRY); return (error); } dd = dvp->v_data; de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); while (de == NULL) { /* While(...) so we can use break */ if (nameiop == DELETE) return (ENOENT); /* * OK, we didn't have an entry for the name we were asked for * so we try to see if anybody can create it on demand. */ pname = devfs_fqpn(specname, dmp, dd, cnp); if (pname == NULL) break; cdev = NULL; DEVFS_DMP_HOLD(dmp); sx_xunlock(&dmp->dm_lock); sx_slock(&clone_drain_lock); EVENTHANDLER_INVOKE(dev_clone, td->td_ucred, pname, strlen(pname), &cdev); sx_sunlock(&clone_drain_lock); if (cdev == NULL) sx_xlock(&dmp->dm_lock); else if (devfs_populate_vp(dvp) != 0) { *dm_unlock = 0; sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } else sx_xunlock(&dmp->dm_lock); dev_rel(cdev); return (ENOENT); } if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); if (cdev != NULL) dev_rel(cdev); return (ENOENT); } if (cdev == NULL) break; dev_lock(); dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; if (dde != NULL && *dde != NULL) de = *dde; dev_unlock(); dev_rel(cdev); break; } if (de == NULL || de->de_flags & DE_WHITEOUT) { if ((nameiop == CREATE || nameiop == RENAME) && (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } return (ENOENT); } if (devfs_prison_check(de, td)) return (ENOENT); if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) return (error); if (*vpp == dvp) { VREF(dvp); *vpp = dvp; return (0); } } error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; return (error); } static int devfs_lookup(struct vop_lookup_args *ap) { int j; struct devfs_mount *dmp; int dm_unlock; if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOTDIR); dmp = VFSTODEVFS(ap->a_dvp->v_mount); dm_unlock = 1; j = devfs_lookupx(ap, &dm_unlock); if (dm_unlock == 1) sx_xunlock(&dmp->dm_lock); return (j); } static int devfs_mknod(struct vop_mknod_args *ap) { struct componentname *cnp; struct vnode *dvp, **vpp; struct devfs_dirent *dd, *de; struct devfs_mount *dmp; int error; /* * The only type of node we should be creating here is a * character device, for anything else return EOPNOTSUPP. */ if (ap->a_vap->va_type != VCHR) return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); cnp = ap->a_cnp; vpp = ap->a_vpp; dd = dvp->v_data; error = ENOENT; sx_xlock(&dmp->dm_lock); TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (cnp->cn_namelen != de->de_dirent->d_namlen) continue; if (de->de_dirent->d_type == DT_CHR && (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, de->de_dirent->d_namlen) != 0) continue; if (de->de_flags & DE_WHITEOUT) break; goto notfound; } if (de == NULL) goto notfound; de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); return (error); notfound: sx_xunlock(&dmp->dm_lock); return (error); } /* ARGSUSED */ static int devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error, ref, vlocked; struct cdevsw *dsw; struct file *fpop; struct mtx *mtxp; if (vp->v_type == VBLK) return (ENXIO); if (dev == NULL) return (ENXIO); /* Make this field valid before any I/O in d_open. */ if (dev->si_iosize_max == 0) dev->si_iosize_max = DFLTPHYS; dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); if (fp == NULL && dsw->d_fdopen != NULL) { dev_relthread(dev, ref); return (ENXIO); } vlocked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); fpop = td->td_fpop; td->td_fpop = fp; if (fp != NULL) { fp->f_data = dev; fp->f_vnode = vp; } if (dsw->d_fdopen != NULL) error = dsw->d_fdopen(dev, ap->a_mode, td, fp); else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); /* Clean up any cdevpriv upon error. */ if (error != 0) devfs_clear_cdevpriv(); td->td_fpop = fpop; vn_lock(vp, vlocked | LK_RETRY); dev_relthread(dev, ref); if (error != 0) { if (error == ERESTART) error = EINTR; return (error); } #if 0 /* /dev/console */ KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); #else if (fp == NULL) return (error); #endif if (fp->f_ops == &badfileops) finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); mtxp = mtx_pool_find(mtxpool_sleep, fp); /* * Hint to the dofilewrite() to not force the buffer draining * on the writer to the file. Most likely, the write would * not need normal buffers. */ mtx_lock(mtxp); fp->f_vnread_flags |= FDEVFS_VNODE; mtx_unlock(mtxp); return (error); } static int devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { case _PC_MAC_PRESENT: #ifdef MAC /* * If MAC is enabled, devfs automatically supports * trivial non-persistant label storage. */ *ap->a_retval = 1; #else *ap->a_retval = 0; #endif return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } /* ARGSUSED */ static int devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_poll(fp, events, cred, td); return (error); } error = dsw->d_poll(dev, events, td); td->td_fpop = fpop; dev_relthread(dev, ref); return(error); } /* * Print out the contents of a special device vnode. */ static int devfs_print(struct vop_print_args *ap) { printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); return (0); } static int devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int ioflag, error, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_read(fp, uio, cred, flags, td); return (error); } resid = uio->uio_resid; ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); error = dsw->d_read(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) devfs_timestamp(&dev->si_atime); td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_readdir(struct vop_readdir_args *ap) { int error; struct uio *uio; struct dirent *dp; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp; off_t off; int *tmp_ncookies = NULL; if (ap->a_vp->v_type != VDIR) return (ENOTDIR); uio = ap->a_uio; if (uio->uio_offset < 0) return (EINVAL); /* * XXX: This is a temporary hack to get around this filesystem not * supporting cookies. We store the location of the ncookies pointer * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() * and set the number of cookies to 0. We then set the pointer to * NULL so that vfs_read_dirent doesn't try to call realloc() on * ap->a_cookies. Later in this function, we restore the ap->a_ncookies * pointer to its original location before returning to the caller. */ if (ap->a_ncookies != NULL) { tmp_ncookies = ap->a_ncookies; *ap->a_ncookies = 0; ap->a_ncookies = NULL; } dmp = VFSTODEVFS(ap->a_vp->v_mount); if (devfs_populate_vp(ap->a_vp) != 0) { if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (EIO); } error = 0; de = ap->a_vp->v_data; off = 0; TAILQ_FOREACH(dd, &de->de_dlist, de_list) { KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) continue; if (devfs_prison_check(dd, uio->uio_td)) continue; if (dd->de_dirent->d_type == DT_DIR) de = dd->de_dir; else de = dd; dp = dd->de_dirent; + MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp)); if (dp->d_reclen > uio->uio_resid) break; dp->d_fileno = de->de_inode; if (off >= uio->uio_offset) { error = vfs_read_dirent(ap, dp, off); if (error) break; } off += dp->d_reclen; } sx_xunlock(&dmp->dm_lock); uio->uio_offset = off; /* * Restore ap->a_ncookies if it wasn't originally NULL in the first * place. */ if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (error); } static int devfs_readlink(struct vop_readlink_args *ap) { struct devfs_dirent *de; de = ap->a_vp->v_data; return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); } static int devfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vnode_destroy_vobject(vp); return (0); } static int devfs_reclaim_vchr(struct vop_reclaim_args *ap) { struct vnode *vp; struct cdev *dev; vp = ap->a_vp; MPASS(vp->v_type == VCHR); devfs_reclaim(ap); VI_LOCK(vp); dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; if (dev != NULL) dev->si_usecount -= vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); if (dev != NULL) dev_rel(dev); return (0); } static int devfs_remove(struct vop_remove_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered; struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); ASSERT_VOP_ELOCKED(vp, "devfs_remove"); sx_xlock(&dmp->dm_lock); dd = ap->a_dvp->v_data; de = vp->v_data; if (de->de_cdp == NULL) { TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_dirent->d_type == DT_LNK) { de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) de_covered->de_flags &= ~DE_COVERED; } /* We need to unlock dvp because devfs_delete() may lock it. */ VOP_UNLOCK(vp, 0); if (dvp != vp) VOP_UNLOCK(dvp, 0); devfs_delete(dmp, de, 0); sx_xunlock(&dmp->dm_lock); if (dvp != vp) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } else { de->de_flags |= DE_WHITEOUT; sx_xunlock(&dmp->dm_lock); } return (0); } /* * Revoke is called on a tty when a terminal session ends. The vnode * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. * */ static int devfs_revoke(struct vop_revoke_args *ap) { struct vnode *vp = ap->a_vp, *vp2; struct cdev *dev; struct cdev_priv *cdp; struct devfs_dirent *de; u_int i; KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); dev = vp->v_rdev; cdp = cdev2priv(dev); dev_lock(); cdp->cdp_inuse++; dev_unlock(); vhold(vp); vgone(vp); vdrop(vp); VOP_UNLOCK(vp,0); loop: for (;;) { mtx_lock(&devfs_de_interlock); dev_lock(); vp2 = NULL; for (i = 0; i <= cdp->cdp_maxdirent; i++) { de = cdp->cdp_dirents[i]; if (de == NULL) continue; vp2 = de->de_vnode; if (vp2 != NULL) { dev_unlock(); VI_LOCK(vp2); mtx_unlock(&devfs_de_interlock); if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) goto loop; vhold(vp2); vgone(vp2); vdrop(vp2); vput(vp2); break; } } if (vp2 != NULL) { continue; } dev_unlock(); mtx_unlock(&devfs_de_interlock); break; } dev_lock(); cdp->cdp_inuse--; if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); } else dev_unlock(); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); return (0); } static int devfs_rioctl(struct vop_ioctl_args *ap) { struct vnode *vp; struct devfs_mount *dmp; int error; vp = ap->a_vp; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); return (EBADF); } dmp = VFSTODEVFS(vp->v_mount); sx_xlock(&dmp->dm_lock); VOP_UNLOCK(vp, 0); DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); sx_xunlock(&dmp->dm_lock); return (error); } static int devfs_rread(struct vop_read_args *ap) { if (ap->a_vp->v_type != VDIR) return (EINVAL); return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); } static int devfs_setattr(struct vop_setattr_args *ap) { struct devfs_dirent *de; struct vattr *vap; struct vnode *vp; struct thread *td; int c, error; uid_t uid; gid_t gid; vap = ap->a_vap; vp = ap->a_vp; td = curthread; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_flags != VNOVAL && vap->va_flags != 0) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } error = devfs_populate_vp(vp); if (error != 0) return (error); de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; c = 0; if (vap->va_uid == (uid_t)VNOVAL) uid = de->de_uid; else uid = vap->va_uid; if (vap->va_gid == (gid_t)VNOVAL) gid = de->de_gid; else gid = vap->va_gid; if (uid != de->de_uid || gid != de->de_gid) { if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { error = priv_check(td, PRIV_VFS_CHOWN); if (error != 0) goto ret; } de->de_uid = uid; de->de_gid = gid; c = 1; } if (vap->va_mode != (mode_t)VNOVAL) { if (ap->a_cred->cr_uid != de->de_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto ret; } de->de_mode = vap->va_mode; c = 1; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { error = vn_utimes_perm(vp, vap, ap->a_cred, td); if (error != 0) goto ret; if (vap->va_atime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_atime = vap->va_atime; else de->de_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_mtime = vap->va_mtime; else de->de_mtime = vap->va_mtime; } c = 1; } if (c) { if (vp->v_type == VCHR) vfs_timestamp(&vp->v_rdev->si_ctime); else vfs_timestamp(&de->de_mtime); } ret: sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); return (error); } #ifdef MAC static int devfs_setlabel(struct vop_setlabel_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; de = vp->v_data; mac_vnode_relabel(ap->a_cred, vp, ap->a_label); mac_devfs_update(vp->v_mount, de, vp); return (0); } #endif static int devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) { return (vnops.fo_stat(fp, sb, cred, td)); } static int devfs_symlink(struct vop_symlink_args *ap) { int i, error; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered, *de_dotdot; struct devfs_mount *dmp; error = priv_check(curthread, PRIV_DEVFS_SYMLINK); if (error) return(error); dmp = VFSTODEVFS(ap->a_dvp->v_mount); if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOENT); dd = ap->a_dvp->v_data; de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); de->de_flags = DE_USER; de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_inode = alloc_unr(devfs_inos); de->de_dir = dd; de->de_dirent->d_type = DT_LNK; i = strlen(ap->a_target) + 1; de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); bcopy(ap->a_target, de->de_symlink, i); #ifdef MAC mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) { if ((de_covered->de_flags & DE_USER) != 0) { devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); sx_xunlock(&dmp->dm_lock); return (EEXIST); } KASSERT((de_covered->de_flags & DE_COVERED) == 0, ("devfs_symlink: entry %p already covered", de_covered)); de_covered->de_flags |= DE_COVERED; } de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); devfs_dir_ref_de(dmp, dd); devfs_rules_apply(dmp, de); return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); } static int devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) { return (vnops.fo_truncate(fp, length, cred, td)); } static int devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int error, ioflag, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_write(fp, uio, cred, flags, td); return (error); } KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); resid = uio->uio_resid; error = dsw->d_write(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { devfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; } td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; struct mount *mp; struct vnode *vp; struct file *fpop; vm_object_t object; vm_prot_t maxprot; int error, ref; vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { maxprot = VM_PROT_NONE; if ((prot & VM_PROT_EXECUTE) != 0) return (EACCES); } else maxprot = VM_PROT_EXECUTE; if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_READ; else if ((prot & VM_PROT_READ) != 0) return (EACCES); /* * If we are sharing potential changes via MAP_SHARED and we * are trying to get write permission although we opened it * without asking for it, bail out. * * Note that most character devices always share mappings. * The one exception is that D_MMAP_ANON devices * (i.e. /dev/zero) permit private writable mappings. * * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests * as well as updating maxprot to permit writing for * D_MMAP_ANON devices rather than doing that here. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) maxprot |= VM_PROT_WRITE; else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } maxprot &= cap_maxprot; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) return (error); error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, &object); td->td_fpop = fpop; dev_relthread(dev, ref); if (error != 0) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, FALSE, td); if (error != 0) vm_object_deallocate(object); return (error); } dev_t dev2udev(struct cdev *x) { if (x == NULL) return (NODEV); return (cdev2priv(x)->cdp_inode); } static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, .fo_truncate = devfs_truncate_f, .fo_ioctl = devfs_ioctl_f, .fo_poll = devfs_poll_f, .fo_kqfilter = devfs_kqfilter_f, .fo_stat = devfs_stat_f, .fo_close = devfs_close_f, .fo_chmod = vn_chmod, .fo_chown = vn_chown, .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = devfs_mmap_f, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; /* Vops for non-CHR vnodes in /dev. */ static struct vop_vector devfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_rioctl, .vop_lookup = devfs_lookup, .vop_mknod = devfs_mknod, .vop_pathconf = devfs_pathconf, .vop_read = devfs_rread, .vop_readdir = devfs_readdir, .vop_readlink = devfs_readlink, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_revoke = devfs_revoke, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_symlink = devfs_symlink, .vop_vptocnp = devfs_vptocnp, }; /* Vops for VCHR vnodes in /dev. */ static struct vop_vector devfs_specops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_bmap = VOP_PANIC, .vop_close = devfs_close, .vop_create = VOP_PANIC, .vop_fsync = vop_stdfsync, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_ioctl, .vop_link = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = devfs_open, .vop_pathconf = devfs_pathconf, .vop_poll = dead_poll, .vop_print = devfs_print, .vop_read = dead_read, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_reclaim = devfs_reclaim_vchr, .vop_remove = devfs_remove, .vop_rename = VOP_PANIC, .vop_revoke = devfs_revoke, .vop_rmdir = VOP_PANIC, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_strategy = VOP_PANIC, .vop_symlink = VOP_PANIC, .vop_vptocnp = devfs_vptocnp, .vop_write = dead_write, }; /* * Our calling convention to the device drivers used to be that we passed * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ * flags instead since that's what open(), close() and ioctl() takes and * we don't really want vnode.h in device drivers. * We solved the source compatibility by redefining some vnode flags to * be the same as the fcntl ones and by sending down the bitwise OR of * the respective fcntl/vnode flags. These CTASSERTS make sure nobody * pulls the rug out under this. */ CTASSERT(O_NONBLOCK == IO_NDELAY); CTASSERT(O_FSYNC == IO_SYNC); Index: head/sys/fs/fdescfs/fdesc_vnops.c =================================================================== --- head/sys/fs/fdescfs/fdesc_vnops.c (revision 318735) +++ head/sys/fs/fdescfs/fdesc_vnops.c (revision 318736) @@ -1,569 +1,569 @@ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94 * * $FreeBSD$ */ /* * /dev/fd Filesystem */ #include #include #include #include #include #include #include /* boottime */ #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #define NFDCACHE 4 #define FD_NHASH(ix) \ (&fdhashtbl[(ix) & fdhash]) static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; static u_long fdhash; struct mtx fdesc_hashmtx; static vop_getattr_t fdesc_getattr; static vop_lookup_t fdesc_lookup; static vop_open_t fdesc_open; static vop_readdir_t fdesc_readdir; static vop_reclaim_t fdesc_reclaim; static vop_setattr_t fdesc_setattr; static struct vop_vector fdesc_vnodeops = { .vop_default = &default_vnodeops, .vop_access = VOP_NULL, .vop_getattr = fdesc_getattr, .vop_lookup = fdesc_lookup, .vop_open = fdesc_open, .vop_pathconf = vop_stdpathconf, .vop_readdir = fdesc_readdir, .vop_reclaim = fdesc_reclaim, .vop_setattr = fdesc_setattr, }; static void fdesc_insmntque_dtr(struct vnode *, void *); static void fdesc_remove_entry(struct fdescnode *); /* * Initialise cache headers */ int fdesc_init(struct vfsconf *vfsp) { mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF); fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); return (0); } /* * Uninit ready for unload. */ int fdesc_uninit(struct vfsconf *vfsp) { hashdestroy(fdhashtbl, M_CACHE, fdhash); mtx_destroy(&fdesc_hashmtx); return (0); } /* * If allocating vnode fails, call this. */ static void fdesc_insmntque_dtr(struct vnode *vp, void *arg) { vgone(vp); vput(vp); } /* * Remove an entry from the hash if it exists. */ static void fdesc_remove_entry(struct fdescnode *fd) { struct fdhashhead *fc; struct fdescnode *fd2; fc = FD_NHASH(fd->fd_ix); mtx_lock(&fdesc_hashmtx); LIST_FOREACH(fd2, fc, fd_hash) { if (fd == fd2) { LIST_REMOVE(fd, fd_hash); break; } } mtx_unlock(&fdesc_hashmtx); } int fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp, struct vnode **vpp) { struct fdescmount *fmp; struct fdhashhead *fc; struct fdescnode *fd, *fd2; struct vnode *vp, *vp2; struct thread *td; int error = 0; td = curthread; fc = FD_NHASH(ix); loop: mtx_lock(&fdesc_hashmtx); /* * If a forced unmount is progressing, we need to drop it. The flags are * protected by the hashmtx. */ fmp = (struct fdescmount *)mp->mnt_data; if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { mtx_unlock(&fdesc_hashmtx); return (-1); } LIST_FOREACH(fd, fc, fd_hash) { if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { /* Get reference to vnode in case it's being free'd */ vp = fd->fd_vnode; VI_LOCK(vp); mtx_unlock(&fdesc_hashmtx); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; *vpp = vp; return (0); } } mtx_unlock(&fdesc_hashmtx); fd = malloc(sizeof(struct fdescnode), M_TEMP, M_WAITOK); error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp); if (error) { free(fd, M_TEMP); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_data = fd; fd->fd_vnode = vp; fd->fd_type = ftype; fd->fd_fd = fd_fd; fd->fd_ix = ix; error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL); if (error != 0) { *vpp = NULLVP; return (error); } /* Make sure that someone didn't beat us when inserting the vnode. */ mtx_lock(&fdesc_hashmtx); /* * If a forced unmount is progressing, we need to drop it. The flags are * protected by the hashmtx. */ fmp = (struct fdescmount *)mp->mnt_data; if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { mtx_unlock(&fdesc_hashmtx); vgone(vp); vput(vp); *vpp = NULLVP; return (-1); } LIST_FOREACH(fd2, fc, fd_hash) { if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { /* Get reference to vnode in case it's being free'd */ vp2 = fd2->fd_vnode; VI_LOCK(vp2); mtx_unlock(&fdesc_hashmtx); error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td); /* Someone beat us, dec use count and wait for reclaim */ vgone(vp); vput(vp); /* If we didn't get it, return no vnode. */ if (error) vp2 = NULLVP; *vpp = vp2; return (error); } } /* If we came here, we can insert it safely. */ LIST_INSERT_HEAD(fc, fd, fd_hash); mtx_unlock(&fdesc_hashmtx); *vpp = vp; return (0); } struct fdesc_get_ino_args { fdntype ftype; unsigned fd_fd; int ix; struct file *fp; struct thread *td; }; static int fdesc_get_ino_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { struct fdesc_get_ino_args *a; int error; a = arg; error = fdesc_allocvp(a->ftype, a->fd_fd, a->ix, mp, rvp); fdrop(a->fp, a->td); return (error); } /* * vp is the current namei directory * ndp is the name to locate in that directory... */ static int fdesc_lookup(struct vop_lookup_args *ap) { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; char *pname = cnp->cn_nameptr; struct thread *td = cnp->cn_thread; struct file *fp; struct fdesc_get_ino_args arg; cap_rights_t rights; int nlen = cnp->cn_namelen; u_int fd, fd1; int error; struct vnode *fvp; if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad; } if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; VREF(dvp); return (0); } if (VTOFDESC(dvp)->fd_type != Froot) { error = ENOTDIR; goto bad; } fd = 0; /* the only time a leading 0 is acceptable is if it's "0" */ if (*pname == '0' && nlen != 1) { error = ENOENT; goto bad; } while (nlen--) { if (*pname < '0' || *pname > '9') { error = ENOENT; goto bad; } fd1 = 10 * fd + *pname++ - '0'; if (fd1 < fd) { error = ENOENT; goto bad; } fd = fd1; } /* * No rights to check since 'fp' isn't actually used. */ if ((error = fget(td, fd, cap_rights_init(&rights), &fp)) != 0) goto bad; /* Check if we're looking up ourselves. */ if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) { /* * In case we're holding the last reference to the file, the dvp * will be re-acquired. */ vhold(dvp); VOP_UNLOCK(dvp, 0); fdrop(fp, td); /* Re-aquire the lock afterwards. */ vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); vdrop(dvp); fvp = dvp; if ((dvp->v_iflag & VI_DOOMED) != 0) error = ENOENT; } else { /* * Unlock our root node (dvp) when doing this, since we might * deadlock since the vnode might be locked by another thread * and the root vnode lock will be obtained afterwards (in case * we're looking up the fd of the root vnode), which will be the * opposite lock order. Vhold the root vnode first so we don't * lose it. */ arg.ftype = Fdesc; arg.fd_fd = fd; arg.ix = FD_DESC + fd; arg.fp = fp; arg.td = td; error = vn_vget_ino_gen(dvp, fdesc_get_ino_alloc, &arg, LK_EXCLUSIVE, &fvp); } if (error) goto bad; *vpp = fvp; return (0); bad: *vpp = NULL; return (error); } static int fdesc_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; if (VTOFDESC(vp)->fd_type == Froot) return (0); /* * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the file * descriptor being sought for duplication. The error return ensures * that the vnode for this device will be released by vn_open. Open * will detect this special error and take the actions in dupfdopen. * Other callers of vn_open or VOP_OPEN will simply report the * error. */ ap->a_td->td_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ return (ENODEV); } static int fdesc_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct timeval boottime; getboottime(&boottime); vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; vap->va_fileid = VTOFDESC(vp)->fd_ix; vap->va_uid = 0; vap->va_gid = 0; vap->va_blocksize = DEV_BSIZE; vap->va_atime.tv_sec = boottime.tv_sec; vap->va_atime.tv_nsec = 0; vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_mtime; vap->va_gen = 0; vap->va_flags = 0; vap->va_bytes = 0; vap->va_filerev = 0; switch (VTOFDESC(vp)->fd_type) { case Froot: vap->va_type = VDIR; vap->va_nlink = 2; vap->va_size = DEV_BSIZE; vap->va_rdev = NODEV; break; case Fdesc: vap->va_type = VCHR; vap->va_nlink = 1; vap->va_size = 0; vap->va_rdev = makedev(0, vap->va_fileid); break; default: panic("fdesc_getattr"); break; } vp->v_type = vap->va_type; return (0); } static int fdesc_setattr(struct vop_setattr_args *ap) { struct vattr *vap = ap->a_vap; struct vnode *vp; struct mount *mp; struct file *fp; struct thread *td = curthread; cap_rights_t rights; unsigned fd; int error; /* * Can't mess with the root vnode */ if (VTOFDESC(ap->a_vp)->fd_type == Froot) return (EACCES); fd = VTOFDESC(ap->a_vp)->fd_fd; /* * Allow setattr where there is an underlying vnode. */ error = getvnode(td, fd, cap_rights_init(&rights, CAP_EXTATTR_SET), &fp); if (error) { /* * getvnode() returns EINVAL if the file descriptor is not * backed by a vnode. Silently drop all changes except * chflags(2) in this case. */ if (error == EINVAL) { if (vap->va_flags != VNOVAL) error = EOPNOTSUPP; else error = 0; } return (error); } vp = fp->f_vnode; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); } fdrop(fp, td); return (error); } -#define UIO_MX 16 +#define UIO_MX _GENERIC_DIRLEN(10) /* number of symbols in INT_MAX printout */ static int fdesc_readdir(struct vop_readdir_args *ap) { struct uio *uio = ap->a_uio; struct filedesc *fdp; struct dirent d; struct dirent *dp = &d; int error, i, off, fcnt; if (VTOFDESC(ap->a_vp)->fd_type != Froot) panic("fdesc_readdir: not dir"); if (ap->a_ncookies != NULL) *ap->a_ncookies = 0; off = (int)uio->uio_offset; if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || uio->uio_resid < UIO_MX) return (EINVAL); i = (u_int)off / UIO_MX; fdp = uio->uio_td->td_proc->p_fd; error = 0; fcnt = i - 2; /* The first two nodes are `.' and `..' */ FILEDESC_SLOCK(fdp); while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { bzero((caddr_t)dp, UIO_MX); switch (i) { case 0: /* `.' */ case 1: /* `..' */ dp->d_fileno = i + FD_ROOT; dp->d_namlen = i + 1; dp->d_reclen = UIO_MX; bcopy("..", dp->d_name, dp->d_namlen); dp->d_name[i + 1] = '\0'; dp->d_type = DT_DIR; break; default: if (fdp->fd_ofiles[fcnt].fde_file == NULL) break; dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; dp->d_type = DT_CHR; dp->d_fileno = i + FD_DESC; break; } if (dp->d_namlen != 0) { /* * And ship to userland */ FILEDESC_SUNLOCK(fdp); error = uiomove(dp, UIO_MX, uio); if (error) goto done; FILEDESC_SLOCK(fdp); } i++; fcnt++; } FILEDESC_SUNLOCK(fdp); done: uio->uio_offset = i * UIO_MX; return (error); } static int fdesc_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp; struct fdescnode *fd; vp = ap->a_vp; fd = VTOFDESC(vp); fdesc_remove_entry(fd); free(vp->v_data, M_TEMP); vp->v_data = NULL; return (0); } Index: head/sys/fs/nandfs/nandfs_fs.h =================================================================== --- head/sys/fs/nandfs/nandfs_fs.h (revision 318735) +++ head/sys/fs/nandfs/nandfs_fs.h (revision 318736) @@ -1,565 +1,565 @@ /*- * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Original definitions written by Koji Sato * and Ryusuke Konishi * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud * * $FreeBSD$ */ #ifndef _NANDFS_FS_H #define _NANDFS_FS_H #include #define MNINDIR(fsdev) ((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t)) /* * Inode structure. There are a few dedicated inode numbers that are * defined here first. */ #define NANDFS_WHT_INO 1 /* Whiteout ino */ #define NANDFS_ROOT_INO 2 /* Root file inode */ #define NANDFS_DAT_INO 3 /* DAT file */ #define NANDFS_CPFILE_INO 4 /* checkpoint file */ #define NANDFS_SUFILE_INO 5 /* segment usage file */ #define NANDFS_IFILE_INO 6 /* ifile */ #define NANDFS_GC_INO 7 /* Cleanerd node */ #define NANDFS_ATIME_INO 8 /* Atime file (reserved) */ #define NANDFS_XATTR_INO 9 /* Xattribute file (reserved) */ #define NANDFS_SKETCH_INO 10 /* Sketch file (obsolete) */ #define NANDFS_USER_INO 11 /* First user's file inode number */ #define NANDFS_SYS_NODE(ino) \ (((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO)) #define NANDFS_NDADDR 12 /* Direct addresses in inode. */ #define NANDFS_NIADDR 3 /* Indirect addresses in inode. */ typedef int64_t nandfs_daddr_t; typedef int64_t nandfs_lbn_t; struct nandfs_inode { uint64_t i_blocks; /* 0: size in device blocks */ uint64_t i_size; /* 8: size in bytes */ uint64_t i_ctime; /* 16: creation time in seconds */ uint64_t i_mtime; /* 24: modification time in seconds part*/ uint32_t i_ctime_nsec; /* 32: creation time nanoseconds part */ uint32_t i_mtime_nsec; /* 36: modification time in nanoseconds */ uint32_t i_uid; /* 40: user id */ uint32_t i_gid; /* 44: group id */ uint16_t i_mode; /* 48: file mode */ uint16_t i_links_count; /* 50: number of references to the inode*/ uint32_t i_flags; /* 52: NANDFS_*_FL flags */ nandfs_daddr_t i_special; /* 56: special */ nandfs_daddr_t i_db[NANDFS_NDADDR]; /* 64: Direct disk blocks. */ nandfs_daddr_t i_ib[NANDFS_NIADDR]; /* 160: Indirect disk blocks. */ uint64_t i_xattr; /* 184: reserved for extended attributes*/ uint32_t i_generation; /* 192: file generation for NFS */ uint32_t i_pad[15]; /* 196: make it 64 bits aligned */ }; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_inode) == 256); #endif /* * Each checkpoint/snapshot has a super root. * * The super root holds the inodes of the three system files: `dat', `cp' and * 'su' files. All other FS state is defined by those. * * It is CRC checksum'ed and time stamped. */ struct nandfs_super_root { uint32_t sr_sum; /* check-sum */ uint16_t sr_bytes; /* byte count of this structure */ uint16_t sr_flags; /* reserved for flags */ uint64_t sr_nongc_ctime; /* timestamp, not for cleaner(?) */ struct nandfs_inode sr_dat; /* DAT, virt->phys translation inode */ struct nandfs_inode sr_cpfile; /* CP, checkpoints inode */ struct nandfs_inode sr_sufile; /* SU, segment usage inode */ }; #define NANDFS_SR_MDT_OFFSET(inode_size, i) \ ((uint32_t)&((struct nandfs_super_root *)0)->sr_dat + \ (inode_size) * (i)) #define NANDFS_SR_DAT_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 0) #define NANDFS_SR_CPFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 1) #define NANDFS_SR_SUFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 2) #define NANDFS_SR_BYTES (sizeof(struct nandfs_super_root)) /* * The superblock describes the basic structure and mount history. It also * records some sizes of structures found on the disc for sanity checks. * * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and * NANDFS_SB2_OFFSET_BYTES. */ /* File system states stored on media in superblock's sbp->s_state */ #define NANDFS_VALID_FS 0x0001 /* cleanly unmounted and all is ok */ #define NANDFS_ERROR_FS 0x0002 /* there were errors detected, fsck */ #define NANDFS_RESIZE_FS 0x0004 /* resize required, XXX unknown flag*/ #define NANDFS_MOUNT_STATE_BITS "\20\1VALID_FS\2ERROR_FS\3RESIZE_FS" /* * Brief description of control structures: * * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks. * Simple round-robin policy is used in order to choose which block will * contain new super block. * * Simple case with 2 blocks: * 1: fsdata sblock1 [sblock3 [sblock5 ..]] * 2: fsdata sblock2 [sblock4 [sblock6 ..]] */ struct nandfs_fsdata { uint16_t f_magic; uint16_t f_bytes; uint32_t f_sum; /* checksum of fsdata */ uint32_t f_rev_level; /* major disk format revision */ uint64_t f_ctime; /* creation time (execution time of newfs) */ /* Block size represented as: blocksize = 1 << (f_log_block_size + 10) */ uint32_t f_log_block_size; uint16_t f_inode_size; /* size of an inode */ uint16_t f_dat_entry_size; /* size of a dat entry */ uint16_t f_checkpoint_size; /* size of a checkpoint */ uint16_t f_segment_usage_size; /* size of a segment usage */ uint16_t f_sbbytes; /* byte count of CRC calculation for super blocks. s_reserved is excluded! */ uint16_t f_errors; /* behaviour on detecting errors */ uint32_t f_erasesize; uint64_t f_nsegments; /* number of segm. in filesystem */ nandfs_daddr_t f_first_data_block; /* 1st seg disk block number */ uint32_t f_blocks_per_segment; /* number of blocks per segment */ uint32_t f_r_segments_percentage; /* reserved segments percentage */ struct uuid f_uuid; /* 128-bit uuid for volume */ char f_volume_name[16]; /* volume name */ uint32_t f_pad[104]; } __packed; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_fsdata) == 512); #endif struct nandfs_super_block { uint16_t s_magic; /* magic value for identification */ uint32_t s_sum; /* check sum of super block */ uint64_t s_last_cno; /* last checkpoint number */ uint64_t s_last_pseg; /* addr part. segm. written last */ uint64_t s_last_seq; /* seq.number of seg written last */ uint64_t s_free_blocks_count; /* free blocks count */ uint64_t s_mtime; /* mount time */ uint64_t s_wtime; /* write time */ uint16_t s_state; /* file system state */ char s_last_mounted[64]; /* directory where last mounted */ uint32_t s_c_interval; /* commit interval of segment */ uint32_t s_c_block_max; /* threshold of data amount for the segment construction */ uint32_t s_reserved[32]; /* padding to end of the block */ } __packed; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_super_block) == 256); #endif #define NANDFS_FSDATA_MAGIC 0xf8da #define NANDFS_SUPER_MAGIC 0x8008 #define NANDFS_NFSAREAS 4 #define NANDFS_DATA_OFFSET_BYTES(esize) (NANDFS_NFSAREAS * (esize)) #define NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata)) #define NANDFS_DEF_BLOCKSIZE 4096 #define NANDFS_MIN_BLOCKSIZE 512 #define NANDFS_DEF_ERASESIZE (2 << 16) #define NANDFS_MIN_SEGSIZE NANDFS_DEF_ERASESIZE #define NANDFS_CURRENT_REV 9 /* current major revision */ #define NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad) /* Bytes count of super_block for CRC-calculation */ #define NANDFS_SB_BYTES offsetof(struct nandfs_super_block, s_reserved) /* Maximal count of links to a file */ #define NANDFS_LINK_MAX 32000 /* * Structure of a directory entry. * * Note that they can't span blocks; the rec_len fills out. */ #define NANDFS_NAME_LEN 255 struct nandfs_dir_entry { uint64_t inode; /* inode number */ uint16_t rec_len; /* directory entry length */ uint8_t name_len; /* name length */ uint8_t file_type; char name[NANDFS_NAME_LEN]; /* file name */ char pad; }; /* * NANDFS_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 8 */ #define NANDFS_DIR_PAD 8 #define NANDFS_DIR_ROUND (NANDFS_DIR_PAD - 1) #define NANDFS_DIR_NAME_OFFSET (offsetof(struct nandfs_dir_entry, name)) #define NANDFS_DIR_REC_LEN(name_len) \ (((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND) \ & ~NANDFS_DIR_ROUND) #define NANDFS_DIR_NAME_LEN(name_len) \ (NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET) /* * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is * filled with one or more partial segments of variable lengths. * * Each partial segment has a segment summary header followed by updates of * files and optionally a super root. */ /* * Virtual to physical block translation information. For data blocks it maps * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non * datablocks it is the virtual block number assigned to an indirect block * and has no bi_blkoff. The physical block number is the next * available data block in the partial segment after all the binfo's. */ struct nandfs_binfo_v { uint64_t bi_ino; /* file's inode */ uint64_t bi_vblocknr; /* assigned virtual block number */ uint64_t bi_blkoff; /* for file's logical block number */ }; /* * DAT allocation. For data blocks just the logical block number that maps on * the next available data block in the partial segment after the binfo's. */ struct nandfs_binfo_dat { uint64_t bi_ino; uint64_t bi_blkoff; /* DAT file's logical block number */ uint8_t bi_level; /* whether this is meta block */ uint8_t bi_pad[7]; }; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat)); #endif /* Convenience union for both types of binfo's */ union nandfs_binfo { struct nandfs_binfo_v bi_v; struct nandfs_binfo_dat bi_dat; }; /* Indirect buffers path */ struct nandfs_indir { nandfs_daddr_t in_lbn; int in_off; }; /* The (partial) segment summary */ struct nandfs_segment_summary { uint32_t ss_datasum; /* CRC of complete data block */ uint32_t ss_sumsum; /* CRC of segment summary only */ uint32_t ss_magic; /* magic to identify segment summary */ uint16_t ss_bytes; /* size of segment summary structure */ uint16_t ss_flags; /* NANDFS_SS_* flags */ uint64_t ss_seq; /* sequence number of this segm. sum */ uint64_t ss_create; /* creation timestamp in seconds */ uint64_t ss_next; /* blocknumber of next segment */ uint32_t ss_nblocks; /* number of blocks used by summary */ uint32_t ss_nbinfos; /* number of binfo structures */ uint32_t ss_sumbytes; /* total size of segment summary */ uint32_t ss_pad; /* stream of binfo structures */ }; #define NANDFS_SEGSUM_MAGIC 0x8e680011 /* segment summary magic number */ /* Segment summary flags */ #define NANDFS_SS_LOGBGN 0x0001 /* begins a logical segment */ #define NANDFS_SS_LOGEND 0x0002 /* ends a logical segment */ #define NANDFS_SS_SR 0x0004 /* has super root */ #define NANDFS_SS_SYNDT 0x0008 /* includes data only updates */ #define NANDFS_SS_GC 0x0010 /* segment written for cleaner operation */ #define NANDFS_SS_FLAG_BITS "\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC" /* Segment summary constrains */ #define NANDFS_SEG_MIN_BLOCKS 16 /* minimum number of blocks in a full segment */ #define NANDFS_PSEG_MIN_BLOCKS 2 /* minimum number of blocks in a partial segment */ #define NANDFS_MIN_NRSVSEGS 8 /* minimum number of reserved segments */ /* * Structure of DAT/inode file. * * A DAT file is divided into groups. The maximum number of groups is the * number of block group descriptors that fit into one block; this descriptor * only gives the number of free entries in the associated group. * * Each group has a block sized bitmap indicating if an entry is taken or * empty. Each bit stands for a DAT entry. * * The inode file has exactly the same format only the entries are inode * entries. */ struct nandfs_block_group_desc { uint32_t bg_nfrees; /* num. free entries in block group */ }; /* DAT entry in a super root's DAT file */ struct nandfs_dat_entry { uint64_t de_blocknr; /* block number */ uint64_t de_start; /* valid from checkpoint */ uint64_t de_end; /* valid till checkpoint */ uint64_t de_rsv; /* reserved for future use */ }; /* * Structure of CP file. * * A snapshot is just a checkpoint only it's protected against removal by the * cleaner. The snapshots are kept on a double linked list of checkpoints. */ struct nandfs_snapshot_list { uint64_t ssl_next; /* checkpoint nr. forward */ uint64_t ssl_prev; /* checkpoint nr. back */ }; /* Checkpoint entry structure */ struct nandfs_checkpoint { uint32_t cp_flags; /* NANDFS_CHECKPOINT_* flags */ uint32_t cp_checkpoints_count; /* ZERO, not used anymore? */ struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots */ uint64_t cp_cno; /* checkpoint number */ uint64_t cp_create; /* creation timestamp */ uint64_t cp_nblk_inc; /* number of blocks incremented */ uint64_t cp_blocks_count; /* reserved (might be deleted) */ struct nandfs_inode cp_ifile_inode; /* inode file inode */ }; /* Checkpoint flags */ #define NANDFS_CHECKPOINT_SNAPSHOT 1 #define NANDFS_CHECKPOINT_INVALID 2 #define NANDFS_CHECKPOINT_SKETCH 4 #define NANDFS_CHECKPOINT_MINOR 8 #define NANDFS_CHECKPOINT_BITS "\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR" /* Header of the checkpoint file */ struct nandfs_cpfile_header { uint64_t ch_ncheckpoints; /* number of checkpoints */ uint64_t ch_nsnapshots; /* number of snapshots */ struct nandfs_snapshot_list ch_snapshot_list; /* snapshot list */ }; #define NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET \ ((sizeof(struct nandfs_cpfile_header) + \ sizeof(struct nandfs_checkpoint) - 1) / \ sizeof(struct nandfs_checkpoint)) #define NANDFS_NOSEGMENT 0xffffffff /* * Structure of SU file. * * The segment usage file sums up how each of the segments are used. They are * indexed by their segment number. */ /* Segment usage entry */ struct nandfs_segment_usage { uint64_t su_lastmod; /* last modified timestamp */ uint32_t su_nblocks; /* number of blocks in segment */ uint32_t su_flags; /* NANDFS_SEGMENT_USAGE_* flags */ }; /* Segment usage flag */ #define NANDFS_SEGMENT_USAGE_ACTIVE 1 #define NANDFS_SEGMENT_USAGE_DIRTY 2 #define NANDFS_SEGMENT_USAGE_ERROR 4 #define NANDFS_SEGMENT_USAGE_GC 8 #define NANDFS_SEGMENT_USAGE_BITS "\20\1ACTIVE\2DIRTY\3ERROR" /* Header of the segment usage file */ struct nandfs_sufile_header { uint64_t sh_ncleansegs; /* number of segments marked clean */ uint64_t sh_ndirtysegs; /* number of segments marked dirty */ uint64_t sh_last_alloc; /* last allocated segment number */ }; #define NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ ((sizeof(struct nandfs_sufile_header) + \ sizeof(struct nandfs_segment_usage) - 1) / \ sizeof(struct nandfs_segment_usage)) struct nandfs_seg_stat { uint64_t nss_nsegs; uint64_t nss_ncleansegs; uint64_t nss_ndirtysegs; uint64_t nss_ctime; uint64_t nss_nongc_ctime; uint64_t nss_prot_seq; }; enum { NANDFS_CHECKPOINT, NANDFS_SNAPSHOT }; #define NANDFS_CPINFO_MAX 512 struct nandfs_cpinfo { uint32_t nci_flags; uint32_t nci_pad; uint64_t nci_cno; uint64_t nci_create; uint64_t nci_nblk_inc; uint64_t nci_blocks_count; uint64_t nci_next; }; #define NANDFS_SEGMENTS_MAX 512 struct nandfs_suinfo { uint64_t nsi_num; uint64_t nsi_lastmod; uint32_t nsi_blocks; uint32_t nsi_flags; }; #define NANDFS_VINFO_MAX 512 struct nandfs_vinfo { uint64_t nvi_ino; uint64_t nvi_vblocknr; uint64_t nvi_start; uint64_t nvi_end; uint64_t nvi_blocknr; int nvi_alive; }; struct nandfs_cpmode { uint64_t ncpm_cno; uint32_t ncpm_mode; uint32_t ncpm_pad; }; struct nandfs_argv { uint64_t nv_base; uint32_t nv_nmembs; uint16_t nv_size; uint16_t nv_flags; uint64_t nv_index; }; struct nandfs_cpstat { uint64_t ncp_cno; uint64_t ncp_ncps; uint64_t ncp_nss; }; struct nandfs_period { uint64_t p_start; uint64_t p_end; }; struct nandfs_vdesc { uint64_t vd_ino; uint64_t vd_cno; uint64_t vd_vblocknr; struct nandfs_period vd_period; uint64_t vd_blocknr; uint64_t vd_offset; uint32_t vd_flags; uint32_t vd_pad; }; struct nandfs_bdesc { uint64_t bd_ino; uint64_t bd_oblocknr; uint64_t bd_blocknr; uint64_t bd_offset; uint32_t bd_level; uint32_t bd_alive; }; #ifndef _KERNEL #ifndef MNAMELEN -#define MNAMELEN 88 +#define MNAMELEN 1024 #endif #endif struct nandfs_fsinfo { struct nandfs_fsdata fs_fsdata; struct nandfs_super_block fs_super; char fs_dev[MNAMELEN]; }; #define NANDFS_MAX_MOUNTS 65535 #define NANDFS_IOCTL_GET_SUSTAT _IOR('N', 100, struct nandfs_seg_stat) #define NANDFS_IOCTL_CHANGE_CPMODE _IOWR('N', 101, struct nandfs_cpmode) #define NANDFS_IOCTL_GET_CPINFO _IOWR('N', 102, struct nandfs_argv) #define NANDFS_IOCTL_DELETE_CP _IOWR('N', 103, uint64_t[2]) #define NANDFS_IOCTL_GET_CPSTAT _IOR('N', 104, struct nandfs_cpstat) #define NANDFS_IOCTL_GET_SUINFO _IOWR('N', 105, struct nandfs_argv) #define NANDFS_IOCTL_GET_VINFO _IOWR('N', 106, struct nandfs_argv) #define NANDFS_IOCTL_GET_BDESCS _IOWR('N', 107, struct nandfs_argv) #define NANDFS_IOCTL_GET_FSINFO _IOR('N', 108, struct nandfs_fsinfo) #define NANDFS_IOCTL_MAKE_SNAP _IOWR('N', 109, uint64_t) #define NANDFS_IOCTL_DELETE_SNAP _IOWR('N', 110, uint64_t) #define NANDFS_IOCTL_SYNC _IOWR('N', 111, uint64_t) #endif /* _NANDFS_FS_H */ Index: head/sys/fs/nfs/nfsport.h =================================================================== --- head/sys/fs/nfs/nfsport.h (revision 318735) +++ head/sys/fs/nfs/nfsport.h (revision 318736) @@ -1,1048 +1,1042 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPORT_H_ #define _NFS_NFSPORT_H_ /* * In general, I'm not fond of #includes in .h files, but this seems * to be the cleanest way to handle #include files for the ports. */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * For Darwin, these functions should be "static" when built in a kext. * (This is always defined as nil otherwise.) */ #define APPLESTATIC #include #include #include #include #include #include #include #include #include #include #include "opt_nfs.h" #include "opt_ufs.h" /* * These types must be defined before the nfs includes. */ #define NFSSOCKADDR_T struct sockaddr * #define NFSPROC_T struct thread #define NFSDEV_T dev_t #define NFSSVCARGS nfssvc_args #define NFSACL_T struct acl /* * These should be defined as the types used for the corresponding VOP's * argument type. */ #define NFS_ACCESS_ARGS struct vop_access_args #define NFS_OPEN_ARGS struct vop_open_args #define NFS_GETATTR_ARGS struct vop_getattr_args #define NFS_LOOKUP_ARGS struct vop_lookup_args #define NFS_READDIR_ARGS struct vop_readdir_args /* * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL. */ #define NFSMGET(m) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMGETHDR(m) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMCLGET(m, w) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ MCLGET((m), (w)); \ } while (0) #define NFSMCLGETHDR(m, w) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMTOD mtod /* * Client side constant for size of a lockowner name. */ #define NFSV4CL_LOCKNAMELEN 12 /* * Type for a mutex lock. */ #define NFSMUTEX_T struct mtx #endif /* _KERNEL */ /* * NFSv4 Operation numbers. */ #define NFSV4OP_ACCESS 3 #define NFSV4OP_CLOSE 4 #define NFSV4OP_COMMIT 5 #define NFSV4OP_CREATE 6 #define NFSV4OP_DELEGPURGE 7 #define NFSV4OP_DELEGRETURN 8 #define NFSV4OP_GETATTR 9 #define NFSV4OP_GETFH 10 #define NFSV4OP_LINK 11 #define NFSV4OP_LOCK 12 #define NFSV4OP_LOCKT 13 #define NFSV4OP_LOCKU 14 #define NFSV4OP_LOOKUP 15 #define NFSV4OP_LOOKUPP 16 #define NFSV4OP_NVERIFY 17 #define NFSV4OP_OPEN 18 #define NFSV4OP_OPENATTR 19 #define NFSV4OP_OPENCONFIRM 20 #define NFSV4OP_OPENDOWNGRADE 21 #define NFSV4OP_PUTFH 22 #define NFSV4OP_PUTPUBFH 23 #define NFSV4OP_PUTROOTFH 24 #define NFSV4OP_READ 25 #define NFSV4OP_READDIR 26 #define NFSV4OP_READLINK 27 #define NFSV4OP_REMOVE 28 #define NFSV4OP_RENAME 29 #define NFSV4OP_RENEW 30 #define NFSV4OP_RESTOREFH 31 #define NFSV4OP_SAVEFH 32 #define NFSV4OP_SECINFO 33 #define NFSV4OP_SETATTR 34 #define NFSV4OP_SETCLIENTID 35 #define NFSV4OP_SETCLIENTIDCFRM 36 #define NFSV4OP_VERIFY 37 #define NFSV4OP_WRITE 38 #define NFSV4OP_RELEASELCKOWN 39 /* * Must be one greater than the last Operation#. */ #define NFSV4OP_NOPS 40 /* * Additional Ops for NFSv4.1. */ #define NFSV4OP_BACKCHANNELCTL 40 #define NFSV4OP_BINDCONNTOSESS 41 #define NFSV4OP_EXCHANGEID 42 #define NFSV4OP_CREATESESSION 43 #define NFSV4OP_DESTROYSESSION 44 #define NFSV4OP_FREESTATEID 45 #define NFSV4OP_GETDIRDELEG 46 #define NFSV4OP_GETDEVINFO 47 #define NFSV4OP_GETDEVLIST 48 #define NFSV4OP_LAYOUTCOMMIT 49 #define NFSV4OP_LAYOUTGET 50 #define NFSV4OP_LAYOUTRETURN 51 #define NFSV4OP_SECINFONONAME 52 #define NFSV4OP_SEQUENCE 53 #define NFSV4OP_SETSSV 54 #define NFSV4OP_TESTSTATEID 55 #define NFSV4OP_WANTDELEG 56 #define NFSV4OP_DESTROYCLIENTID 57 #define NFSV4OP_RECLAIMCOMPL 58 /* * Must be one more than last op#. * NFSv4.2 isn't implemented yet, but define the op# limit for it. */ #define NFSV41_NOPS 59 #define NFSV42_NOPS 72 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS. */ #define NFSV4OP_SYMLINK (NFSV42_NOPS) #define NFSV4OP_MKDIR (NFSV42_NOPS + 1) #define NFSV4OP_RMDIR (NFSV42_NOPS + 2) #define NFSV4OP_READDIRPLUS (NFSV42_NOPS + 3) #define NFSV4OP_MKNOD (NFSV42_NOPS + 4) #define NFSV4OP_FSSTAT (NFSV42_NOPS + 5) #define NFSV4OP_FSINFO (NFSV42_NOPS + 6) #define NFSV4OP_PATHCONF (NFSV42_NOPS + 7) #define NFSV4OP_V3CREATE (NFSV42_NOPS + 8) /* * This is the count of the fake operations listed above. */ #define NFSV4OP_FAKENOPS 9 /* * and the Callback OPs */ #define NFSV4OP_CBGETATTR 3 #define NFSV4OP_CBRECALL 4 /* * Must be one greater than the last Callback Operation# for NFSv4.0. */ #define NFSV4OP_CBNOPS 5 /* * Additional Callback Ops for NFSv4.1 only. */ #define NFSV4OP_CBLAYOUTRECALL 5 #define NFSV4OP_CBNOTIFY 6 #define NFSV4OP_CBPUSHDELEG 7 #define NFSV4OP_CBRECALLANY 8 #define NFSV4OP_CBRECALLOBJAVAIL 9 #define NFSV4OP_CBRECALLSLOT 10 #define NFSV4OP_CBSEQUENCE 11 #define NFSV4OP_CBWANTCANCELLED 12 #define NFSV4OP_CBNOTIFYLOCK 13 #define NFSV4OP_CBNOTIFYDEVID 14 #define NFSV41_CBNOPS 15 #define NFSV42_CBNOPS 16 /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 /* Additional procedures for NFSv4.1. */ #define NFSPROC_EXCHANGEID 41 #define NFSPROC_CREATESESSION 42 #define NFSPROC_DESTROYSESSION 43 #define NFSPROC_DESTROYCLIENT 44 #define NFSPROC_FREESTATEID 45 #define NFSPROC_LAYOUTGET 46 #define NFSPROC_GETDEVICEINFO 47 #define NFSPROC_LAYOUTCOMMIT 48 #define NFSPROC_LAYOUTRETURN 49 #define NFSPROC_RECLAIMCOMPL 50 #define NFSPROC_WRITEDS 51 #define NFSPROC_READDS 52 #define NFSPROC_COMMITDS 53 /* * Must be defined as one higher than the last NFSv4.1 Proc# above. */ #define NFSV41_NPROCS 54 #endif /* NFS_V3NPROCS */ /* * New stats structure. * The vers field will be set to NFSSTATS_V1 by the caller. */ #define NFSSTATS_V1 1 struct nfsstatsv1 { int vers; /* Set to version requested by caller. */ uint64_t attrcache_hits; uint64_t attrcache_misses; uint64_t lookupcache_hits; uint64_t lookupcache_misses; uint64_t direofcache_hits; uint64_t direofcache_misses; uint64_t accesscache_hits; uint64_t accesscache_misses; uint64_t biocache_reads; uint64_t read_bios; uint64_t read_physios; uint64_t biocache_writes; uint64_t write_bios; uint64_t write_physios; uint64_t biocache_readlinks; uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; uint64_t rpccnt[NFSV41_NPROCS + 15]; uint64_t rpcretries; uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvrpc_errs; uint64_t srv_errs; uint64_t rpcrequests; uint64_t rpctimeouts; uint64_t rpcunexpected; uint64_t rpcinvalid; uint64_t srvcache_inproghits; uint64_t srvcache_idemdonehits; uint64_t srvcache_nonidemdonehits; uint64_t srvcache_misses; uint64_t srvcache_tcppeak; int srvcache_size; /* Updated by atomic_xx_int(). */ uint64_t srvclients; uint64_t srvopenowners; uint64_t srvopens; uint64_t srvlockowners; uint64_t srvlocks; uint64_t srvdelegates; uint64_t cbrpccnt[NFSV42_CBNOPS]; uint64_t clopenowners; uint64_t clopens; uint64_t cllockowners; uint64_t cllocks; uint64_t cldelegates; uint64_t cllocalopenowners; uint64_t cllocalopens; uint64_t cllocallockowners; uint64_t cllocallocks; uint64_t srvstartcnt; uint64_t srvdonecnt; uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime busyfrom; struct bintime busytime; }; /* * Old stats structure. */ struct ext_nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int accesscache_hits; int accesscache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFSV4_NPROCS]; int rpcretries; int srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvcache_tcppeak; int srvcache_size; int srvclients; int srvopenowners; int srvopens; int srvlockowners; int srvlocks; int srvdelegates; int cbrpccnt[NFSV4OP_CBNOPS]; int clopenowners; int clopens; int cllockowners; int cllocks; int cldelegates; int cllocalopenowners; int cllocalopens; int cllocallockowners; int cllocallocks; }; #ifdef _KERNEL /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Just to keep nfs_var.h happy. */ struct nfs_vattr { int junk; }; struct nfsvattr { struct vattr na_vattr; nfsattrbit_t na_suppattr; u_int32_t na_mntonfileno; u_int64_t na_filesid[2]; }; #define na_type na_vattr.va_type #define na_mode na_vattr.va_mode #define na_nlink na_vattr.va_nlink #define na_uid na_vattr.va_uid #define na_gid na_vattr.va_gid #define na_fsid na_vattr.va_fsid #define na_fileid na_vattr.va_fileid #define na_size na_vattr.va_size #define na_blocksize na_vattr.va_blocksize #define na_atime na_vattr.va_atime #define na_mtime na_vattr.va_mtime #define na_ctime na_vattr.va_ctime #define na_gen na_vattr.va_gen #define na_flags na_vattr.va_flags #define na_rdev na_vattr.va_rdev #define na_bytes na_vattr.va_bytes #define na_filerev na_vattr.va_filerev #define na_vaflags na_vattr.va_vaflags #include /* * This is the header structure used for the lists, etc. (It has the * above record in it. */ struct nfsrv_stablefirst { LIST_HEAD(, nfsrv_stable) nsf_head; /* Head of nfsrv_stable list */ time_t nsf_eograce; /* Time grace period ends */ time_t *nsf_bootvals; /* Previous boottime values */ struct file *nsf_fp; /* File table pointer */ u_char nsf_flags; /* NFSNSF_ flags */ struct nfsf_rec nsf_rec; /* and above first record */ }; #define nsf_lease nsf_rec.lease #define nsf_numboots nsf_rec.numboots /* NFSNSF_xxx flags */ #define NFSNSF_UPDATEDONE 0x01 #define NFSNSF_GRACEOVER 0x02 #define NFSNSF_NEEDLOCK 0x04 #define NFSNSF_EXPIREDCLIENT 0x08 #define NFSNSF_NOOPENS 0x10 #define NFSNSF_OK 0x20 /* * Maximum number of boot times allowed in record. Although there is * really no need for a fixed upper bound, this serves as a sanity check * for a corrupted file. */ #define NFSNSF_MAXNUMBOOTS 10000 /* * This structure defines the other records in the file. The * nst_client array is actually the size of the client string name. */ struct nfst_rec { u_int16_t len; u_char flag; u_char client[1]; }; /* and the values for flag */ #define NFSNST_NEWSTATE 0x1 #define NFSNST_REVOKE 0x2 #define NFSNST_GOTSTATE 0x4 /* * This structure is linked onto nfsrv_stablefirst for the duration of * reclaim. */ struct nfsrv_stable { LIST_ENTRY(nfsrv_stable) nst_list; struct nfsclient *nst_clp; struct nfst_rec nst_rec; }; #define nst_timestamp nst_rec.timestamp #define nst_len nst_rec.len #define nst_flag nst_rec.flag #define nst_client nst_rec.client /* * At some point the server will run out of kernel storage for * state structures. For FreeBSD5.2, this results in a panic * kmem_map is full. It happens at well over 1000000 opens plus * locks on a PIII-800 with 256Mbytes, so that is where I've set * the limit. If your server panics due to too many opens/locks, * decrease the size of NFSRV_V4STATELIMIT. If you find the server * returning NFS4ERR_RESOURCE a lot and have lots of memory, try * increasing it. */ #define NFSRV_V4STATELIMIT 500000 /* Max # of Opens + Locks */ /* * The type required differs with BSDen (just the second arg). */ void nfsrvd_rcv(struct socket *, void *, int); /* * Macros for handling socket addresses. (Hopefully this makes the code * more portable, since I've noticed some 'BSD don't have sockaddrs in * mbufs any more.) */ #define NFSSOCKADDR(a, t) ((t)(a)) #define NFSSOCKADDRALLOC(a) \ do { \ MALLOC((a), struct sockaddr *, sizeof (struct sockaddr), \ M_SONAME, M_WAITOK); \ NFSBZERO((a), sizeof (struct sockaddr)); \ } while (0) #define NFSSOCKADDRSIZE(a, s) ((a)->sa_len = (s)) #define NFSSOCKADDRFREE(a) \ do { \ if (a) \ FREE((caddr_t)(a), M_SONAME); \ } while (0) /* * These should be defined as a process or thread structure, as required * for signal handling, etc. */ #define NFSNEWCRED(c) (crdup(c)) #define NFSPROCCRED(p) ((p)->td_ucred) #define NFSFREECRED(c) (crfree(c)) #define NFSUIOPROC(u, p) ((u)->uio_td = NULL) #define NFSPROCP(p) ((p)->td_proc) /* * Define these so that cn_hash and its length is ignored. */ #define NFSCNHASHZERO(c) #define NFSCNHASH(c, v) #define NCHNAMLEN 9999999 /* * These macros are defined to initialize and set the timer routine. */ #define NFS_TIMERINIT \ newnfs_timer(NULL) /* * Handle SMP stuff: */ #define NFSSTATESPINLOCK extern struct mtx nfs_state_mutex #define NFSLOCKSTATE() mtx_lock(&nfs_state_mutex) #define NFSUNLOCKSTATE() mtx_unlock(&nfs_state_mutex) #define NFSSTATEMUTEXPTR (&nfs_state_mutex) #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) #define NFSUNLOCKSOCK() mtx_unlock(&nfs_slock_mutex) #define NFSNAMEIDMUTEX extern struct mtx nfs_nameid_mutex #define NFSLOCKNAMEID() mtx_lock(&nfs_nameid_mutex) #define NFSUNLOCKNAMEID() mtx_unlock(&nfs_nameid_mutex) #define NFSNAMEIDREQUIRED() mtx_assert(&nfs_nameid_mutex, MA_OWNED) #define NFSCLSTATEMUTEX extern struct mtx nfs_clstate_mutex #define NFSCLSTATEMUTEXPTR (&nfs_clstate_mutex) #define NFSLOCKCLSTATE() mtx_lock(&nfs_clstate_mutex) #define NFSUNLOCKCLSTATE() mtx_unlock(&nfs_clstate_mutex) #define NFSDLOCKMUTEX extern struct mtx newnfsd_mtx #define NFSDLOCKMUTEXPTR (&newnfsd_mtx) #define NFSD_LOCK() mtx_lock(&newnfsd_mtx) #define NFSD_UNLOCK() mtx_unlock(&newnfsd_mtx) #define NFSD_LOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_OWNED) #define NFSD_UNLOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_NOTOWNED) #define NFSV4ROOTLOCKMUTEX extern struct mtx nfs_v4root_mutex #define NFSV4ROOTLOCKMUTEXPTR (&nfs_v4root_mutex) #define NFSLOCKV4ROOTMUTEX() mtx_lock(&nfs_v4root_mutex) #define NFSUNLOCKV4ROOTMUTEX() mtx_unlock(&nfs_v4root_mutex) #define NFSLOCKNODE(n) mtx_lock(&((n)->n_mtx)) #define NFSUNLOCKNODE(n) mtx_unlock(&((n)->n_mtx)) #define NFSLOCKMNT(m) mtx_lock(&((m)->nm_mtx)) #define NFSUNLOCKMNT(m) mtx_unlock(&((m)->nm_mtx)) #define NFSLOCKREQUEST(r) mtx_lock(&((r)->r_mtx)) #define NFSUNLOCKREQUEST(r) mtx_unlock(&((r)->r_mtx)) #define NFSPROCLISTLOCK() sx_slock(&allproc_lock) #define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock) #define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx)) #define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx)) #define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx)) #define NFSUNLOCKDS(d) mtx_unlock(&((d)->nfsclds_mtx)) #define NFSSESSIONMUTEXPTR(s) (&((s)->mtx)) #define NFSLOCKSESSION(s) mtx_lock(&((s)->mtx)) #define NFSUNLOCKSESSION(s) mtx_unlock(&((s)->mtx)) /* * Use these macros to initialize/free a mutex. */ #define NFSINITSOCKMUTEX(m) mtx_init((m), "nfssock", NULL, MTX_DEF) #define NFSFREEMUTEX(m) mtx_destroy((m)) int nfsmsleep(void *, void *, int, const char *, struct timespec *); /* * And weird vm stuff in the nfs server. */ #define PDIRUNLOCK 0x0 #define MAX_COMMIT_COUNT (1024 * 1024) /* * Define these to handle the type of va_rdev. */ #define NFSMAKEDEV(m, n) makedev((m), (n)) #define NFSMAJOR(d) major(d) #define NFSMINOR(d) minor(d) /* - * Define this to be the macro that returns the minimum size required - * for a directory entry. - */ -#define DIRENT_SIZE(dp) GENERIC_DIRSIZ(dp) - -/* * The vnode tag for nfsv4root. */ #define VT_NFSV4ROOT "nfsv4root" /* * Define whatever it takes to do a vn_rdwr(). */ #define NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \ vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p)) /* * Macros for handling memory for different BSDen. * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same * NFSBZERO(cp, len) - set len bytes to 0x0 */ #define NFSBCOPY(s, d, l) bcopy((s), (d), (l)) #define NFSOVBCOPY(s, d, l) ovbcopy((s), (d), (l)) #define NFSBCMP(s, d, l) bcmp((s), (d), (l)) #define NFSBZERO(s, l) bzero((s), (l)) /* * Some queue.h files don't have these dfined in them. */ #define LIST_END(head) NULL #define SLIST_END(head) NULL #define TAILQ_END(head) NULL /* * This must be defined to be a global variable that increments once * per second, but never stops or goes backwards, even when a "date" * command changes the TOD clock. It is used for delta times for * leases, etc. */ #define NFSD_MONOSEC time_uptime /* * Declare the malloc types. */ MALLOC_DECLARE(M_NEWNFSRVCACHE); MALLOC_DECLARE(M_NEWNFSDCLIENT); MALLOC_DECLARE(M_NEWNFSDSTATE); MALLOC_DECLARE(M_NEWNFSDLOCK); MALLOC_DECLARE(M_NEWNFSDLOCKFILE); MALLOC_DECLARE(M_NEWNFSSTRING); MALLOC_DECLARE(M_NEWNFSUSERGROUP); MALLOC_DECLARE(M_NEWNFSDREQ); MALLOC_DECLARE(M_NEWNFSFH); MALLOC_DECLARE(M_NEWNFSCLOWNER); MALLOC_DECLARE(M_NEWNFSCLOPEN); MALLOC_DECLARE(M_NEWNFSCLDELEG); MALLOC_DECLARE(M_NEWNFSCLCLIENT); MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER); MALLOC_DECLARE(M_NEWNFSCLLOCK); MALLOC_DECLARE(M_NEWNFSDIROFF); MALLOC_DECLARE(M_NEWNFSV4NODE); MALLOC_DECLARE(M_NEWNFSDIRECTIO); MALLOC_DECLARE(M_NEWNFSMNT); MALLOC_DECLARE(M_NEWNFSDROLLBACK); MALLOC_DECLARE(M_NEWNFSLAYOUT); MALLOC_DECLARE(M_NEWNFSFLAYOUT); MALLOC_DECLARE(M_NEWNFSDEVINFO); MALLOC_DECLARE(M_NEWNFSSOCKREQ); MALLOC_DECLARE(M_NEWNFSCLDS); MALLOC_DECLARE(M_NEWNFSLAYRECALL); MALLOC_DECLARE(M_NEWNFSDSESSION); #define M_NFSRVCACHE M_NEWNFSRVCACHE #define M_NFSDCLIENT M_NEWNFSDCLIENT #define M_NFSDSTATE M_NEWNFSDSTATE #define M_NFSDLOCK M_NEWNFSDLOCK #define M_NFSDLOCKFILE M_NEWNFSDLOCKFILE #define M_NFSSTRING M_NEWNFSSTRING #define M_NFSUSERGROUP M_NEWNFSUSERGROUP #define M_NFSDREQ M_NEWNFSDREQ #define M_NFSFH M_NEWNFSFH #define M_NFSCLOWNER M_NEWNFSCLOWNER #define M_NFSCLOPEN M_NEWNFSCLOPEN #define M_NFSCLDELEG M_NEWNFSCLDELEG #define M_NFSCLCLIENT M_NEWNFSCLCLIENT #define M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER #define M_NFSCLLOCK M_NEWNFSCLLOCK #define M_NFSDIROFF M_NEWNFSDIROFF #define M_NFSV4NODE M_NEWNFSV4NODE #define M_NFSDIRECTIO M_NEWNFSDIRECTIO #define M_NFSDROLLBACK M_NEWNFSDROLLBACK #define M_NFSLAYOUT M_NEWNFSLAYOUT #define M_NFSFLAYOUT M_NEWNFSFLAYOUT #define M_NFSDEVINFO M_NEWNFSDEVINFO #define M_NFSSOCKREQ M_NEWNFSSOCKREQ #define M_NFSCLDS M_NEWNFSCLDS #define M_NFSLAYRECALL M_NEWNFSLAYRECALL #define M_NFSDSESSION M_NEWNFSDSESSION #define NFSINT_SIGMASK(set) \ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ SIGISMEMBER(set, SIGQUIT)) /* * Convert a quota block count to byte count. */ #define NFSQUOTABLKTOBYTE(q, b) (q) *= (b) /* * Define this as the largest file size supported. (It should probably * be available via a VFS_xxx Op, but it isn't. */ #define NFSRV_MAXFILESIZE ((u_int64_t)0x800000000000) /* * Set this macro to index() or strchr(), whichever is supported. */ #define STRCHR(s, c) strchr((s), (c)) /* * Set the n_time in the client write rpc, as required. */ #define NFSWRITERPC_SETTIME(w, n, a, v4) \ do { \ if (w) { \ mtx_lock(&((n)->n_mtx)); \ (n)->n_mtime = (a)->na_mtime; \ if (v4) \ (n)->n_change = (a)->na_filerev; \ mtx_unlock(&((n)->n_mtx)); \ } \ } while (0) /* * Fake value, just to make the client work. */ #define NFS_LATTR_NOSHRINK 1 /* * Prototypes for functions where the arguments vary for different ports. */ int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *, int, int); int newnfs_realign(struct mbuf **, int); /* * If the port runs on an SMP box that can enforce Atomic ops with low * overheads, define these as atomic increments/decrements. If not, * don't worry about it, since these are used for stats that can be * "out by one" without disastrous consequences. */ #define NFSINCRGLOBAL(a) ((a)++) /* * Assorted funky stuff to make things work under Darwin8. */ /* * These macros checks for a field in vattr being set. */ #define NFSATTRISSET(t, v, a) ((v)->a != (t)VNOVAL) #define NFSATTRISSETTIME(v, a) ((v)->a.tv_sec != VNOVAL) /* * Manipulate mount flags. */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the fsinfo */ #define NFSSTA_OPENMODE 0x00200000 /* Must use correct open mode */ #define NFSSTA_NOLAYOUTCOMMIT 0x04000000 /* Don't do LayoutCommit */ #define NFSSTA_SESSPERSIST 0x08000000 /* Has a persistent session */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ #define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */ #define NFSSTA_HASSETFSID 0x40000000 /* Has set the fsid */ #define NFSSTA_PNFS 0x80000000 /* pNFS is enabled */ #define NFSHASNFSV3(n) ((n)->nm_flag & NFSMNT_NFSV3) #define NFSHASNFSV4(n) ((n)->nm_flag & NFSMNT_NFSV4) #define NFSHASNFSV4N(n) ((n)->nm_minorvers > 0) #define NFSHASNFSV3OR4(n) ((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) #define NFSHASGOTFSINFO(n) ((n)->nm_state & NFSSTA_GOTFSINFO) #define NFSHASHASSETFSID(n) ((n)->nm_state & NFSSTA_HASSETFSID) #define NFSHASSTRICT3530(n) ((n)->nm_flag & NFSMNT_STRICT3530) #define NFSHASWRITEVERF(n) ((n)->nm_state & NFSSTA_HASWRITEVERF) #define NFSHASINT(n) ((n)->nm_flag & NFSMNT_INT) #define NFSHASSOFT(n) ((n)->nm_flag & NFSMNT_SOFT) #define NFSHASINTORSOFT(n) ((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT)) #define NFSHASDUMBTIMR(n) ((n)->nm_flag & NFSMNT_DUMBTIMR) #define NFSHASNOCONN(n) ((n)->nm_flag & NFSMNT_MNTD) #define NFSHASKERB(n) ((n)->nm_flag & NFSMNT_KERB) #define NFSHASALLGSSNAME(n) ((n)->nm_flag & NFSMNT_ALLGSSNAME) #define NFSHASINTEGRITY(n) ((n)->nm_flag & NFSMNT_INTEGRITY) #define NFSHASPRIVACY(n) ((n)->nm_flag & NFSMNT_PRIVACY) #define NFSSETWRITEVERF(n) ((n)->nm_state |= NFSSTA_HASWRITEVERF) #define NFSSETHASSETFSID(n) ((n)->nm_state |= NFSSTA_HASSETFSID) #define NFSHASPNFSOPT(n) ((n)->nm_flag & NFSMNT_PNFS) #define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT) #define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST) #define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS) #define NFSHASOPENMODE(n) ((n)->nm_state & NFSSTA_OPENMODE) #define NFSHASONEOPENOWN(n) (((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 && \ (n)->nm_minorvers > 0) /* * Gets the stats field out of the mount structure. */ #define vfs_statfs(m) (&((m)->mnt_stat)) /* * Set boottime. */ #define NFSSETBOOTTIME(b) (getboottime(&b)) /* * The size of directory blocks in the buffer cache. * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!! */ #define NFS_DIRBLKSIZ (16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */ /* * Define these macros to access mnt_flag fields. */ #define NFSMNT_RDONLY(m) ((m)->mnt_flag & MNT_RDONLY) #endif /* _KERNEL */ /* * Define a structure similar to ufs_args for use in exporting the V4 root. */ struct nfsex_args { char *fspec; struct export_args export; }; /* * These export flags should be defined, but there are no bits left. * Maybe a separate mnt_exflag field could be added or the mnt_flag * field increased to 64 bits? */ #ifndef MNT_EXSTRICTACCESS #define MNT_EXSTRICTACCESS 0x0 #endif #ifndef MNT_EXV4ONLY #define MNT_EXV4ONLY 0x0 #endif #ifdef _KERNEL /* * Define this to invalidate the attribute cache for the nfs node. */ #define NFSINVALATTRCACHE(n) ((n)->n_attrstamp = 0) /* Used for FreeBSD only */ void nfsd_mntinit(void); /* * Define these for vnode lock/unlock ops. * * These are good abstractions to macro out, so that they can be added to * later, for debugging or stats, etc. */ #define NFSVOPLOCK(v, f) vn_lock((v), (f)) #define NFSVOPUNLOCK(v, f) VOP_UNLOCK((v), (f)) #define NFSVOPISLOCKED(v) VOP_ISLOCKED((v)) /* * Define ncl_hash(). */ #define ncl_hash(f, l) (fnv_32_buf((f), (l), FNV1_32_INIT)) int newnfs_iosize(struct nfsmount *); int newnfs_vncmpf(struct vnode *, void *); #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 3 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; u_int32_t r_flags; /* flags on request, see below */ struct nfsmount *r_nmp; /* Client mnt ptr */ struct mtx r_mtx; /* Mutex lock for this structure */ }; #ifndef NFS_MAXBSIZE #define NFS_MAXBSIZE MAXBCACHEBUF #endif /* * This macro checks to see if issuing of delegations is allowed for this * vnode. */ #ifdef VV_DISABLEDELEG #define NFSVNO_DELEGOK(v) \ ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0) #else #define NFSVNO_DELEGOK(v) (1) #endif /* * Name used by getnewvnode() to describe filesystem, "nfs". * For performance reasons it is useful to have the same string * used in both places that call getnewvnode(). */ extern const char nfs_vnode_tag[]; #endif /* _KERNEL */ #endif /* _NFS_NFSPORT_H */ Index: head/sys/fs/nfsclient/nfs_clrpcops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clrpcops.c (revision 318735) +++ head/sys/fs/nfsclient/nfs_clrpcops.c (revision 318736) @@ -1,6005 +1,6015 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Rpc op calls, generally called from the vnode op calls or through the * buffer cache, for NFS v2, 3 and 4. * These do not normally make any changes to vnode arguments or use * structures that might change between the VFS variants. The returned * arguments are all at the end, after the NFSPROC_T *p one. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include SYSCTL_DECL(_vfs_nfs); static int nfsignore_eexist = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW, &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink"); /* * Global variables */ extern int nfs_numnfscbd; extern struct timeval nfsboottime; extern u_int32_t newnfs_false, newnfs_true; extern nfstype nfsv34_type[9]; extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; int nfscl_enablecallb = 0; short nfsv4_cbport = NFSV4_CBPORT; int nfstest_openallsetattr = 0; #endif /* !APPLEKEXT */ -#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) +#define DIRHDSIZ offsetof(struct dirent, d_name) /* * nfscl_getsameserver() can return one of three values: * NFSDSP_USETHISSESSION - Use this session for the DS. * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new * session. * NFSDSP_NOTFOUND - No matching server was found. */ enum nfsclds_state { NFSDSP_USETHISSESSION = 0, NFSDSP_SEQTHISSESSION = 1, NFSDSP_NOTFOUND = 2, }; static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, struct nfscllockowner *, u_int64_t, u_int64_t, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *, struct nfsclds **, NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *); static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *, NFSPROC_T *); static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, struct ucred *, NFSPROC_T *); static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, struct nfsclds *, struct nfsclds **); #ifdef notyet static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, struct ucred *, NFSPROC_T *, void *); #endif /* * nfs null call from vfs. */ APPLESTATIC int nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p) { int error; struct nfsrv_descript nfsd, *nd = &nfsd; NFSCL_REQSTART(nd, NFSPROC_NULL, vp); error = nfscl_request(nd, vp, p, cred, NULL); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs access rpc op. * For nfs version 3 and 4, use the access rpc to check accessibility. If file * modes are changed on the server, accesses might still fail later. */ APPLESTATIC int nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) { int error; u_int32_t mode, rmode; if (acmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vnode_vtype(vp) == VDIR) { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE); if (acmode & VEXEC) mode |= NFSACCESS_LOOKUP; } else { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (acmode & VEXEC) mode |= NFSACCESS_EXECUTE; } /* * Now, just call nfsrpc_accessrpc() to do the actual RPC. */ error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode, NULL); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if (!error && (rmode & mode) != mode) error = EACCES; return (error); } /* * The actual rpc, separated out for Darwin. */ APPLESTATIC int nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep, void *stuff) { u_int32_t *tl; u_int32_t supported, rmode; int error; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *attrflagp = 0; supported = mode; NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(mode); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); supported = fxdr_unsigned(u_int32_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } rmode = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); /* * It's not obvious what should be done about * unsupported access modes. For now, be paranoid * and clear the unsupported ones. */ rmode &= supported; *rmodep = rmode; } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs open rpc */ APPLESTATIC int nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int32_t mode, clidrev; int ret, newone, error, expireret = 0, retrycnt; /* * For NFSv4, Open Ops are only done on Regular Files. */ if (vnode_vtype(vp) != VREG) return (0); mode = 0; if (amode & FREAD) mode |= NFSV4OPEN_ACCESSREAD; if (amode & FWRITE) mode |= NFSV4OPEN_ACCESSWRITE; nfhp = np->n_fhp; retrycnt = 0; #ifdef notdef { char name[100]; int namel; namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99; bcopy(NFS4NODENAME(np->n_v4), name, namel); name[namel] = '\0'; printf("rpcopen p=0x%x name=%s",p->p_pid,name); if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]); else printf(" fhl=0\n"); } #endif do { dp = NULL; error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1, cred, p, NULL, &op, &newone, &ret, 1); if (error) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (ret == NFSCLOPEN_DOOPEN) { if (np->n_v4 != NULL) { error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, 0, 0x0, cred, p, 0, 0); if (dp != NULL) { #ifdef APPLE OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag); #else NFSLOCKNODE(np); np->n_flag &= ~NDELEGMOD; /* * Invalidate the attribute cache, so that * attributes that pre-date the issue of a * delegation are not cached, since the * cached attributes will remain valid while * the delegation is held. */ NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); #endif (void) nfscl_deleg(nmp->nm_mountp, op->nfso_own->nfsow_clp, nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp); } } else { error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); } else if (ret == NFSCLOPEN_SETCRED) /* * This is a new local open on a delegation. It needs * to have credentials so that an open can be done * against the server during recovery. */ newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have * been done on this Open successfully and a VOP_CLOSE() * is expected for each of these. * If error is non-zero, don't increment it, since the Open * hasn't succeeded yet. */ if (!error) op->nfso_opencnt++; nfscl_openrelease(nmp, op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * the actual open rpc */ APPLESTATIC int nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **dpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p, int syscred, int recursed) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *dp, *ndp = NULL; struct nfsvattr nfsva; u_int32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby; struct nfsclsession *tsep; dp = *dpp; *dpp = NULL; nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); if (reclaim) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(delegtype); } else { if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; } else { *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); } (void) nfsm_strtom(nd, name, namelen); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(ndp, struct nfscldeleg *, sizeof (struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { ndp->nfsdl_flags = NFSCLDL_READ; } if (ret) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; } if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) && (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL && ndp == NULL && !recursed) { do { ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) { FREE((caddr_t)ndp, M_NFSCLDELEG); ndp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) *dpp = ndp; else if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * open downgrade rpc */ APPLESTATIC int nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Close operation. */ APPLESTATIC int nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p) { struct nfsclclient *clp; int error; if (vnode_vtype(vp) != VREG) return (0); if (doclose) error = nfscl_doclose(vp, &clp, p); else error = nfscl_getclose(vp, &clp); if (error) return (error); nfscl_clientrelease(clp); return (0); } /* * Close the open. */ APPLESTATIC void nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct ucred *tcred; u_int64_t off = 0, len = 0; u_int32_t type = NFSV4LOCKT_READ; int error, do_unlock, trycnt; tcred = newnfs_getcred(); newnfs_copycred(&op->nfso_cred, tcred); /* * (Theoretically this could be done in the same * compound as the close, but having multiple * sequenced Ops in the same compound might be * too scary for some servers.) */ if (op->nfso_posixlock) { off = 0; len = NFS64BITSSET; type = NFSV4LOCKT_READ; } /* * Since this function is only called from VOP_INACTIVE(), no * other thread will be manipulating this Open. As such, the * lock lists are not being changed by other threads, so it should * be safe to do this without locking. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { do_unlock = 1; LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { if (op->nfso_posixlock == 0) { off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; if (lop->nfslo_type == F_WRLCK) type = NFSV4LOCKT_WRITE; else type = NFSV4LOCKT_READ; } if (do_unlock) { trycnt = 0; do { error = nfsrpc_locku(nd, nmp, lp, off, len, type, tcred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0 && trycnt++ < 5); if (op->nfso_posixlock) do_unlock = 0; } nfscl_freelock(lop, 0); } /* * Do a ReleaseLockOwner. * The lock owner name nfsl_owner may be used by other opens for * other files but the lock_owner4 name that nfsrpc_rellockown() * puts on the wire has the file handle for this file appended * to it, so it can be done now. */ (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, tcred, p); } /* * There could be other Opens for different files on the same * OpenOwner, so locking is required. */ NFSLOCKCLSTATE(); nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) nfscl_freelockowner(lp, 0); nfscl_freeopen(op, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * The actual Close RPC. */ APPLESTATIC int nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, op->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Open Confirm RPC. */ APPLESTATIC int nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int error; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4N(nmp)) return (0); /* No confirmation for NFSv4.1. */ nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl = txdr_unsigned(op->nfso_own->nfsow_seqid); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs() * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9]; u_short port; int error, isinet6 = 0, callblen; nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; struct nfsclds *dsp; struct in6_addr a6; struct nfsclsession *tsep; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); clp->nfsc_rev = rev++; if (NFSHASNFSV4N(nmp)) { /* * Either there was no previous session or the * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, &nmp->nm_sockreq, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); /* * The old sessions cannot be safely free'd * here, since they may still be used by * in-progress RPCs. */ tsep = NULL; if (TAILQ_FIRST(&nmp->nm_sess) != NULL) tsep = NFSMNT_MDSSESSION(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); /* * Wake up RPCs waiting for a slot on the * old session. These will then fail with * NFSERR_BADSESSION and be retried with the * new session by nfsv4_setsequence(). * Also wakeup() processes waiting for the * new session. */ if (tsep != NULL) wakeup(&tsep->nfsess_slots); wakeup(&nmp->nm_sess); NFSUNLOCKMNT(nmp); } else nfscl_freenfsclds(dsp); NFSCL_DEBUG(1, "aft createsess=%d\n", error); if (error == 0 && reclaim == 0) { error = nfsrpc_reclaimcomplete(nmp, cred, p); NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); if (error == NFSERR_COMPLETEALREADY || error == NFSERR_NOTSUPP) /* Ignore this error. */ error = 0; } return (error); } /* * Allocate a single session structure for NFSv4.0, because some of * the fields are used by NFSv4.0 although it doesn't do a session. */ dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); NFSLOCKMNT(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); tsep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* * set up the callback address */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) cp = nfscl_getmyip(nmp, &a6, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); cp2 = (u_int8_t *)&port; #ifdef INET6 if ((callblen > 0 && strchr(nfsv4_callbackaddr, ':')) || isinet6) { char ip6buf[INET6_ADDRSTRLEN], *ip6add; (void) nfsm_strtom(nd, "tcp6", 4); if (callblen == 0) { ip6_sprintf(ip6buf, (struct in6_addr *)cp); ip6add = ip6buf; } else { ip6add = nfsv4_callbackaddr; } snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", ip6add, cp2[0], cp2[1]); } else #endif { (void) nfsm_strtom(nd, "tcp", 3); if (callblen == 0) snprintf(addr, INET6_ADDRSTRLEN + 9, "%d.%d.%d.%d.%d.%d", cp[0], cp[1], cp[2], cp[3], cp2[0], cp2[1]); else snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", nfsv4_callbackaddr, cp2[0], cp2[1]); } (void) nfsm_strtom(nd, addr, strlen(addr)); } else { (void) nfsm_strtom(nd, "tcp", 3); (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); tsep->nfsess_clientid.lval[0] = *tl++; tsep->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; /* * and confirm it. */ nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = tsep->nfsess_clientid.lval[0]; *tl++ = tsep->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, nmp->nm_fhsize, NULL, NULL); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred); if (error) goto nfsmout; clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; } } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call. */ APPLESTATIC int nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsm_loadattr(nd, nap); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call with non-vnode arguemnts. */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_NFSV4) != 0) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, NULL, NULL); else error = nfsm_loadattr(nd, nap); } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs setattr operation. */ APPLESTATIC int nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { int error, expireret = 0, openerr, retrycnt; u_int32_t clidrev = 0, mode; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsfh *nfhp; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; retrycnt = 0; do { lckp = NULL; openerr = 1; if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { /* * No Open stateid, so try and open the file * now. */ if (mode == NFSV4OPEN_ACCESSWRITE) openerr = nfsrpc_open(vp, FWRITE, cred, p); else openerr = nfsrpc_open(vp, FREAD, cred, p); if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p, rnap, attrflagp, stuff); else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (!openerr) (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } static int nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); vap->va_type = vnode_vtype(vp); nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff); if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error) error = nfscl_postop_attr(nd, rnap, attrflagp, stuff); mbuf_freem(nd->nd_mrep); if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs lookup rpc */ APPLESTATIC int nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; int error = 0, lookupp = 0; *attrflagp = 0; *dattrflagp = 0; if (vnode_vtype(dvp) != VDIR) return (ENOTDIR); nmp = VFSTONFS(vnode_mount(dvp)); if (len > NFS_MAXNAMLEN) return (ENAMETOOLONG); if (NFSHASNFSV4(nmp) && len == 1 && name[0] == '.') { /* * Just return the current dir's fh. */ np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; return (0); } if (NFSHASNFSV4(nmp) && len == 2 && name[0] == '.' && name[1] == '.') { lookupp = 1; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp); } else { NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp); (void) nfsm_strtom(nd, name, len); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * When an NFSv4 Lookupp returns ENOENT, it means that * the lookup is at the root of an fs, so return this dir. */ if (nd->nd_repstat == NFSERR_NOENT && lookupp) { np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; mbuf_freem(nd->nd_mrep); return (0); } if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } goto nfsmout; } if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error != 0) goto nfsmout; *dattrflagp = 1; /* Skip over the Lookup and GetFH operation status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); } error = nfsm_getfh(nd, nfhpp); if (error) goto nfsmout; error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); nfsmout: mbuf_freem(nd->nd_mrep); if (!error && nd->nd_repstat) error = nd->nd_repstat; return (error); } /* * Do a readlink rpc. */ APPLESTATIC int nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np = VTONFS(vp); nfsattrbit_t attrbits; int error, len, cangetattr = 1; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READLINK, vp); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_STRSIZ(len, NFS_MAXPATHLEN); /* * This seems weird to me, but must have been added to * FreeBSD for some reason. The only thing I can think of * is that there was/is some server that replies with * more link data than it should? */ if (len == NFS_MAXPATHLEN) { NFSLOCKNODE(np); if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) { len = np->n_size; cangetattr = 0; } NFSUNLOCKNODE(np); } error = nfsm_mbufuio(nd, uiop, len); if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Read operation. */ APPLESTATIC int nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { int error, expireret = 0, retrycnt; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { nfhp = np->n_fhp; newcred = NFSNEWCRED(cred); } retrycnt = 0; do { lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_OPENMODE) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual read RPC. */ static int nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; int error = 0, len, retlen, tsiz, eof = 0; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; int rsize; off_t tmp_off; *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } rsize = nmp->nm_rsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; while (tsiz > 0) { *attrflagp = 0; len = (tsiz > rsize) ? rsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_READ, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3); if (nd->nd_flag & ND_NFSV2) { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } else { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } /* * Since I can't do a Getattr for NFSv4 for Write, there * doesn't seem any point in doing one here, either. * (See the comment in nfsrpc_writerpc() for more info.) */ error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); error = nfsm_mbufuio(nd, uiop, retlen); if (error) goto nfsmout; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= retlen; if (!(nd->nd_flag & ND_NFSV2)) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } return (0); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * nfs write operation * When called_from_strategy != 0, it should return EIO for an error that * indicates recovery is in progress, so that the buffer will be left * dirty and be written back to the server later. If it loops around, * the recovery thread could get stuck waiting for the buffer and recovery * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { newcred = NFSNEWCRED(cred); nfhp = np->n_fhp; } retrycnt = 0; do { lckp = NULL; nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; NFSCL_DEBUG(1, "stateid0 in write\n"); } } /* * If there is no stateid for NFSv4, it means this is an * extraneous write after close. Basically a poorly * implemented buffer cache. Just don't do the write. */ if (nostateid) error = 0; else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual write RPC. */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC; int wccflag = 0, wsize; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; off_t tmp_off; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } wsize = nmp->nm_wsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; /* NFSv2 sometimes does a write with */ nd->nd_repstat = 0; /* uio_resid == 0, so the while is not done */ while (tsiz > 0) { *attrflagp = 0; len = (tsiz > wsize) ? wsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_WRITE, vp); if (nd->nd_flag & ND_NFSV4) { nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* * Not sure why someone changed this, since the * RFC clearly states that "beginoffset" and * "totalcount" are ignored, but it wouldn't * surprise me if there's a busted server out there. */ /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiombuf(nd, uiop, len); /* * Although it is tempting to do a normal Getattr Op in the * NFSv4 compound, the result can be a nearly hung client * system if the Getattr asks for Owner and/or OwnerGroup. * It occurs when the client can't map either the Owner or * Owner_group name in the Getattr reply to a uid/gid. When * there is a cache miss, the kernel does an upcall to the * nfsuserd. Then, it can try and read the local /etc/passwd * or /etc/group file. It can then block in getnewbuf(), * waiting for dirty writes to be pushed to the NFS server. * The only reason this doesn't result in a complete * deadlock, is that the upcall times out and allows * the write to complete. However, progress is so slow * that it might just as well be deadlocked. * As such, we get the rest of the attributes, but not * Owner or Owner_group. * nb: nfscl_loadattrcache() needs to be told that these * partial attributes from a write rpc are being * passed in, via a argument flag. */ if (nd->nd_flag & ND_NFSV4) { NFSWRITEGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } if (nd->nd_flag & ND_NFSV4) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = NFS_LATTR_NOSHRINK; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ APPLESTATIC int nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp); if (nd->nd_flag & ND_NFSV4) { if (vtyp == VBLK || vtyp == VCHR) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = vtonfsv34_type(vtyp); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!error && nd->nd_repstat) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs file create call * Mostly just call the approriate routine. (I separated out v4, so that * error recovery wouldn't be as difficult.) */ APPLESTATIC int nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { int error = 0, newone, expireret = 0, retrycnt, unlocked; struct nfsclowner *owp; struct nfscldeleg *dp; struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); u_int32_t clidrev; if (NFSHASNFSV4(nmp)) { retrycnt = 0; do { dp = NULL; error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone, NULL, 1); if (error) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); /* * There is no need to invalidate cached attributes here, * since new post-delegation issue attributes are always * returned by nfsrpc_createv4() and these will update the * attribute cache. */ if (dp != NULL) (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(nmp, owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; } else { error = nfsrpc_createv23(dvp, name, namelen, vap, cverf, fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff); } return (error); } /* * The create rpc for v2 and 3. */ static int nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } static int nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { u_int32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { if (NFSHASNFSV4N(nmp)) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { /* NFSv4.0 */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); (void) nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(dp, struct nfscldeleg *, sizeof (struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); if ((rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if ((rflags & NFSV4OPEN_RESULTCONFIRM) && (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL) { do { ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) { FREE((caddr_t)dp, M_NFSCLDELEG); dp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) *dpp = dp; else if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Nfs remove rpc */ APPLESTATIC int nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np; struct nfsmount *nmp; nfsv4stateid_t dstateid; int error, ret = 0, i; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); nmp = VFSTONFS(vnode_mount(dvp)); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_removedeleg(vp, p, &dstateid); if (ret == 1) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(dvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_REMOVE); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } } error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs rename rpc. */ APPLESTATIC int nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; nfsattrbit_t attrbits; nfsv4stateid_t fdstateid, tdstateid; int error = 0, ret = 0, gottd = 0, gotfd = 0, i; *fattrflagp = 0; *tattrflagp = 0; nmp = VFSTONFS(vnode_mount(fdvp)); if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp, &tdstateid, &gottd, p); if (gotfd && gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp); } else if (gotfd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp); } else if (gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp); } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(tvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_DELEGRETURN); } } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; } if (ret > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(fdvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SAVEFH); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_RENAME); } (void) nfsm_strtom(nd, fnameptr, fnamelen); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); (void) nfsm_strtom(nd, tnameptr, tnamelen); error = nfscl_request(nd, fdvp, p, cred, fstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { if (i == 0 && ret > 1) { /* * If the Delegreturn failed, try again * without it. The server will Recall, as * required. * If ret > 1, the first iteration of this * loop is the second DelegReturn result. */ mbuf_freem(nd->nd_mrep); goto tryagain; } else { nd->nd_flag |= ND_NOMOREDATA; } } } } /* Now, the first wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, fstuff); /* and the second wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } if (!error) error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp, NULL, tstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs hard link create rpc */ APPLESTATIC int nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_LINK, vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); } (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh, VTONFS(dvp)->n_fhp->nfh_len, 0); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LINK); } (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, vp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, dstuff); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* * First, parse out the PutFH and Getattr result. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (!(*(tl + 1))) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; /* * Get the pre-op attributes. */ error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs symbolic link create rpc */ APPLESTATIC int nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int slen, error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); slen = strlen(target); if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFLNK); (void) nfsm_strtom(nd, target, slen); } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_strtom(nd, target, slen); if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if ((nd->nd_flag & ND_NFSV3) && !error) { if (!nd->nd_repstat) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs make dir rpc */ APPLESTATIC int nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; struct nfsfh *fhp; struct nfsmount *nmp; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); fhp = VTONFS(dvp)->n_fhp; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat && !error) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (!error) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) { /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } } if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs remove directory call */ APPLESTATIC int nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * Readdir rpc. * Always returns with either uio_resid unchanged, if you are at the * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks * filled in. * I felt this would allow caching of directory blocks more easily * than returning a pertially filled block. * Directory offset cookies: * Oh my, what to do with them... * I can think of three ways to deal with them: * 1 - have the layer above these RPCs maintain a map between logical * directory byte offsets and the NFS directory offset cookies * 2 - pass the opaque directory offset cookies up into userland * and let the libc functions deal with them, via the system call * 3 - return them to userland in the "struct dirent", so future versions * of libc can use them and do whatever is necessary to make things work * above these rpc calls, in the meantime * For now, I do #3 by "hiding" the directory offset cookies after the * d_name field in struct dirent. This is space inside d_reclen that * will be ignored by anything that doesn't know about them. * The directory offset cookies are filled in as the last 8 bytes of * each directory entry, after d_name. Someday, the userland libc * functions may be able to use these. In the meantime, it satisfies * OpenBSD's requirements for cookies being returned. * If expects the directory offset cookie for the read to be in uio_offset * and returns the one for the next entry after this directory block in * there, as well. */ APPLESTATIC int nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; nfsquad_t cookie, ncookie; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp); struct nfsvattr nfsva; struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0; long dotfileid, dotdotfileid = 0; u_int32_t fakefileno = 0xffffffff, rderr; char *cp; nfsattrbit_t attrbits, dattrbits; u_int32_t *tl2 = NULL; size_t tresid; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * There is no point in reading a lot more than uio_resid, however * adding one additional DIRBLKSIZ makes sense. Since uio_resid * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this * will never make readsize > nm_readdirsize. */ readsize = nmp->nm_readdirsize; if (readsize > uio_uio_resid(uiop)) readsize = uio_uio_resid(uiop) + DIRBLKSIZ; *attrflagp = 0; if (eofp) *eofp = 0; tresid = uio_uio_resid(uiop); cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; nd->nd_mrep = NULL; /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { reqsize = 6 * NFSX_UNSIGNED; NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; + *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; - dp->d_name[1] = '\0'; - dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ - tl = (u_int32_t *)&dp->d_name[4]; + tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; + *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; - dp->d_name[2] = '\0'; - dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ - tl = (u_int32_t *)&dp->d_name[4]; + tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR); } else { reqsize = 5 * NFSX_UNSIGNED; } /* * Loop around doing readdir rpc's of size readsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIR, vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.lval[1]; *tl = txdr_unsigned(readsize); } else { NFSM_BUILD(tl, u_int32_t *, reqsize); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } if (nd->nd_flag & ND_NFSV4) { *tl++ = txdr_unsigned(readsize); *tl = txdr_unsigned(readsize); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } else { *tl = txdr_unsigned(readsize); } } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!(nd->nd_flag & ND_NFSV2)) { if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; NFSUNLOCKNODE(dnp); } } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; len = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_unsigned(long, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } - tlen = NFSM_RNDUP(len); + tlen = roundup2(len, 8); if (tlen == len) - tlen += 4; /* To ensure null termination */ + tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; - if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) { + if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } - if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) + if (_GENERIC_DIRLEN(len) + NFSX_HYPER > + uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRLEN(len) + + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; - cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); + cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; /* null terminate */ cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); ncookie.lval[0] = 0; ncookie.lval[1] = *tl++; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else { if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } dp->d_type = vtonfs_dtype(nfsva.na_type); } } else { dp->d_fileno = nfsva.na_fileid; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; } more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp) { if (tresid == ((size_t)(uio_uio_resid(uiop)))) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ - while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) { - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { + dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #ifndef APPLE /* * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir(). * (Also used for NFS V4 when mount flag set.) * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.) */ APPLESTATIC int nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; vnode_t newvp = NULLVP; struct nfsrv_descript nfsd, *nd = &nfsd; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp), *np; struct nfsvattr nfsva; struct nfsfh *nfhp; nfsquad_t cookie, ncookie; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0; int isdotdot = 0, unlocknewvp = 0; long dotfileid, dotdotfileid = 0, fileno = 0; char *cp; nfsattrbit_t attrbits, dattrbits; size_t tresid; u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr; struct timespec dctime; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); timespecclear(&dctime); *attrflagp = 0; if (eofp != NULL) *eofp = 0; ndp->ni_dvp = vp; nd->nd_mrep = NULL; cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; tresid = uio_uio_resid(uiop); /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dctime = nfsva.na_ctime; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; + *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; - dp->d_name[1] = '\0'; - dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ - tl = (u_int32_t *)&dp->d_name[4]; + tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; + *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; - dp->d_name[2] = '\0'; - dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ - tl = (u_int32_t *)&dp->d_name[4]; + tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSREADDIRPLUS_ATTRBIT(&attrbits); if (gotmnton) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); } /* * Loop around doing readdir rpc's of size nm_readdirsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp); NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_readdirsize); if (nd->nd_flag & ND_NFSV4) { (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0) dctime = nap->na_ctime; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; NFSUNLOCKNODE(dnp); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NFSV4) { ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { fileno = fxdr_unsigned(long, *++tl); tl++; } len = fxdr_unsigned(int, *tl); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } - tlen = NFSM_RNDUP(len); + tlen = roundup2(len, 8); if (tlen == len) - tlen += 4; /* To ensure null termination */ + tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) { + if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } - if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) + if (_GENERIC_DIRLEN(len) + NFSX_HYPER > + uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); + dp->d_off = 0; dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; + dp->d_reclen = _GENERIC_DIRLEN(len) + + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); cnp->cn_nameptr = uio_iov_base(uiop); cnp->cn_namelen = len; NFSCNHASHZERO(cnp); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') isdotdot = 1; else isdotdot = 0; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } nfhp = NULL; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; attrflag = fxdr_unsigned(int, *tl); if (attrflag) { error = nfsm_loadattr(nd, &nfsva); if (error) goto nfsmout; } NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED); if (*tl) { error = nfsm_getfh(nd, &nfhp); if (error) goto nfsmout; } if (!attrflag && nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); nfhp = NULL; } } else { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } } else { dp->d_fileno = fileno; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; if (nfhp != NULL) { if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len, dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) { VREF(vp); newvp = vp; unlocknewvp = 0; FREE((caddr_t)nfhp, M_NFSFH); np = dnp; } else if (isdotdot != 0) { /* * Skip doing a nfscl_nget() call for "..". * There's a race between acquiring the nfs * node here and lookups that look for the * directory being read (in the parent). * It would try to get a lock on ".." here, * owning the lock on the directory being * read. Lookup will hold the lock on ".." * and try to acquire the lock on the * directory being read. * If the directory is unlocked/relocked, * then there is a LOR with the buflock * vp is relocked. */ free(nfhp, M_NFSFH); } else { error = nfscl_nget(vnode_mount(vp), vp, nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE); if (!error) { newvp = NFSTOV(np); unlocknewvp = 1; } } nfhp = NULL; if (newvp != NULLVP) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 0); if (error) { if (unlocknewvp) vput(newvp); else vrele(newvp); goto nfsmout; } dp->d_type = vtonfs_dtype(np->n_vattr.na_type); ndp->ni_vp = newvp; NFSCNHASH(cnp, HASHINIT); if (cnp->cn_namelen <= NCHNAMLEN && (newvp->v_type != VDIR || dctime.tv_sec != 0)) { cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dctime); } if (unlocknewvp) vput(newvp); else vrele(newvp); newvp = NULLVP; } } } else if (nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp != NULL) { if (tresid == uio_uio_resid(uiop)) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #endif /* !APPLE */ /* * Nfs commit rpc */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKMNT(nmp); if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); nd->nd_repstat = NFSERR_STALEWRITEVERF; } NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } nfsmout: if (!error && nd->nd_repstat) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * NFS byte range lock rpc. * (Mostly just calls one of the three lower level RPC routines.) */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; struct nfsfh *nfhp; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int64_t off, len; off_t start, end; u_int32_t clidrev = 0; int error = 0, newone = 0, expireret = 0, retrycnt, donelocally; int callcnt, dorpc; /* * Convert the flock structure into a start and end and do POSIX * bounds checking. */ switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ start = fl->l_start; off = fl->l_start; break; case SEEK_END: start = size + fl->l_start; off = size + fl->l_start; break; default: return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len != 0) { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } len = fl->l_len; if (len == 0) len = NFS64BITSSET; retrycnt = 0; do { nd->nd_repstat = 0; if (op == F_GETLK) { error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, p, id, flags); } else if (error == -1) { error = 0; } nfscl_clientrelease(clp); } else if (op == F_UNLCK && fl->l_type == F_UNLCK) { /* * We must loop around for all lockowner cases. */ callcnt = 0; error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ if (lp == NULL) { if (callcnt == 0) nfscl_clientrelease(clp); else nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; /* * If the server doesn't support Posix lock semantics, * only allow locks on the entire file, since it won't * handle overlapping byte ranges. * There might still be a problem when a lock * upgrade/downgrade (read<->write) occurs, since the * server "might" expect an unlock first? */ if (dorpc && (lp->nfsl_open->nfso_posixlock || (off == 0 && len == NFS64BITSSET))) { /* * Since the lock records will go away, we must * wait for grace and delay here. */ do { error = nfsrpc_locku(nd, nmp, lp, off, len, NFSV4LOCKT_READ, cred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } callcnt++; } while (error == 0 && nd->nd_repstat == 0); nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; if (!lp->nfsl_open->nfso_posixlock && (off != 0 || len != NFS64BITSSET)) { error = EINVAL; } else { error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, lp, newone, reclaim, off, len, fl->l_type, cred, p, 0); } if (!error) error = nd->nd_repstat; nfscl_lockrelease(lp, error, newone); } else { error = EINVAL; } if (!error) error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * The lower level routine for the LockT case. */ APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], np->n_fhp->nfh_len); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); if (nd->nd_repstat == 0) { fl->l_type = F_UNLCK; } else if (nd->nd_repstat == NFSERR_DENIED) { nd->nd_repstat = 0; fl->l_whence = SEEK_SET; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); fl->l_start = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (len == NFS64BITSSET) fl->l_len = 0; else fl->l_len = len; type = fxdr_unsigned(int, *tl++); if (type == NFSV4LOCKT_WRITE) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; /* * XXX For now, I have no idea what to do with the * conflicting lock_owner, so I'll just set the pid == 0 * and skip over the lock_owner. */ fl->l_pid = (pid_t)0; tl += 2; size = fxdr_unsigned(int, *tl); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALECLIENTID) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Lower level function that performs the LockU RPC. */ static int nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfscllockowner *lp, u_int64_t off, u_int64_t len, u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * The actual Lock RPC. */ APPLESTATIC int nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); *tl++ = txdr_unsigned(reclaim); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (newone) { *tl = newnfs_true; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); size = fxdr_unsigned(int, *(tl + 7)); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs statfs rpc * (always called with the vp for the mount point) */ APPLESTATIC int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSSTATFS_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; NFSSETHASSETFSID(nmp); *attrflagp = 1; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; } else { NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (nd->nd_repstat) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATFS(nd->nd_flag & ND_NFSV3)); } if (NFSHASNFSV3(nmp)) { sbp->sf_tbytes = fxdr_hyper(tl); tl += 2; sbp->sf_fbytes = fxdr_hyper(tl); tl += 2; sbp->sf_abytes = fxdr_hyper(tl); tl += 2; sbp->sf_tfiles = fxdr_hyper(tl); tl += 2; sbp->sf_ffiles = fxdr_hyper(tl); tl += 2; sbp->sf_afiles = fxdr_hyper(tl); tl += 2; sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl); } else if (NFSHASNFSV4(nmp) == 0) { sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs pathconf rpc */ APPLESTATIC int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; u_int32_t *tl; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSPATHCONF_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) *attrflagp = 1; } else { error = nd->nd_repstat; } } else { NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF); pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++); pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl++); pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl++); pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs version 3 fsinfo rpc call */ APPLESTATIC int nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO); fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_maxfilesize = fxdr_hyper(tl); tl += 2; fxdr_nfsv3time(tl, &fsp->fs_timedelta); tl += 2; fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Renew RPC. */ APPLESTATIC int nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; struct nfssockreq *nrp; struct nfsclsession *tsep; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); if (dsp == NULL) nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL); else nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, &dsp->nfsclds_sess); if (!NFSHASNFSV4N(nmp)) { /* NFSv4.1 just uses a Sequence Op and not a Renew. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; } nrp = NULL; if (dsp != NULL) nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; if (dsp == NULL) error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); else error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Releaselockowner RPC. */ APPLESTATIC int nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; u_int32_t *tl; int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; if (NFSHASNFSV4N(nmp)) { /* For NFSv4.1, do a FreeStateID. */ nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, NULL); nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); } else { nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Compound to get the mount pt FH. */ APPLESTATIC int nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2; int error, cnt, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL); cp = dirpath; cnt = 0; do { setnil = 0; while (*cp == '/') cp++; cp2 = cp; while (*cp2 != '\0' && *cp2 != '/') cp2++; if (*cp2 == '/') { setnil = 1; *cp2 = '\0'; } if (cp2 != cp) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LOOKUP); nfsm_strtom(nd, cp, strlen(cp)); cnt++; } if (setnil) *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); if (NFSHASNFSV4N(nmp)) /* Has a Sequence Op done by nfscl_reqstart(). */ *opcntp = txdr_unsigned(3 + cnt); else *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); tl += (2 + 2 * cnt); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len); if (nd->nd_repstat == 0) nmp->nm_fhsize = len; } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Delegreturn RPC. */ APPLESTATIC int nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p, int syscred) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getacl call. */ APPLESTATIC int nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_GETACL, vp); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs setacl call. */ APPLESTATIC int nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff); return (error); } /* * nfs setacl call. */ static int nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_SETACL, vp); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); /* Don't care about the pre/postop attributes */ mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } /* * Do the NFSv4.1 Exchange ID. */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsclds *dsp; struct timespec verstime; int error, len; *dspp = NULL; nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(exchflags); *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* Set the implementation id4 */ *tl = txdr_unsigned(1); (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void) nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, (int)nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); len = fxdr_unsigned(int, *(tl + 7)); if (len < 0 || len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; dsp->nfsclds_servownlen = len; dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; dsp->nfsclds_sess.nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); v41flags = fxdr_unsigned(uint32_t, *tl); if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && NFSHASPNFSOPT(nmp)) { NFSCL_DEBUG(1, "set PNFS\n"); NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_PNFS; NFSUNLOCKMNT(nmp); dsp->nfsclds_flags |= NFSCLDS_MDS; } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); if (nd->nd_repstat == 0) { mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); nfscl_initsessionslots(&dsp->nfsclds_sess); *dspp = dsp; } else free(dsp, M_NFSCLDS); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Create Session. */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; *tl++ = txdr_unsigned(sequenceid); crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0) crflags |= NFSV4CRSESS_CONNBACKCHAN; *tl = txdr_unsigned(crflags); /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(100000); /* Max request size */ *tl++ = txdr_unsigned(100000); /* Max response size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(10000); /* Max request size */ *tl++ = txdr_unsigned(10000); /* Max response size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(4); /* Max operations */ *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ /* Allow AUTH_SYS callbacks as uid, gid == 0. */ *tl++ = txdr_unsigned(1); /* Auth_sys only */ *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ *tl++ = 0; /* Null machine name */ *tl++ = 0; /* Uid == 0 */ *tl++ = 0; /* Gid == 0 */ *tl = 0; /* No additional gids */ nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); crflags = fxdr_unsigned(uint32_t, *tl); if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_SESSPERSIST; NFSUNLOCKMNT(nmp); } /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 3; /* Skip the other counts. */ sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt > 0) NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); /* and the back channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Session. */ int nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); tsep = nfsmnt_mdssession(nmp); bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Client. */ int nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutGet. */ int nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsfh *nfhp; struct nfsclflayout *flp, *prevflp, *tflp; int cnt, error, gotiomode, fhcnt, nfhlen, i, j; uint8_t *cp; uint64_t retlen; flp = NULL; gotiomode = -1; nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); *tl++ = newnfs_false; /* Don't signal availability. */ *tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES); *tl++ = txdr_unsigned(iomode); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; txdr_hyper(minlen, tl); tl += 2; *tl++ = txdr_unsigned(stateidp->seqid); NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl = txdr_unsigned(layoutlen); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); if (*tl++ != 0) *retonclosep = 1; else *retonclosep = 0; stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, (int)stateidp->seqid); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl++; cnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); if (cnt <= 0 || cnt > 10000) { /* Don't accept more than 10000 layouts in reply. */ error = NFSERR_BADXDR; goto nfsmout; } for (i = 0; i < cnt; i++) { /* Dissect all the way to the file handle cnt. */ NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER + 6 * NFSX_UNSIGNED + NFSX_V4DEVICEID); fhcnt = fxdr_unsigned(int, *(tl + 11 + NFSX_V4DEVICEID / NFSX_UNSIGNED)); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 0 || fhcnt > 100) { /* Don't accept more than 100 file handles. */ error = NFSERR_BADXDR; goto nfsmout; } if (fhcnt > 1) flp = malloc(sizeof(*flp) + (fhcnt - 1) * sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK); else flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = 0; flp->nfsfl_fhcnt = 0; flp->nfsfl_devp = NULL; flp->nfsfl_off = fxdr_hyper(tl); tl += 2; retlen = fxdr_hyper(tl); tl += 2; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = fxdr_unsigned(int, *tl++); if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode, (int)flp->nfsfl_iomode); if (fxdr_unsigned(int, *tl++) != NFSLAYOUT_NFSV4_1_FILES) { printf("NFSv4.1: got non-files layout\n"); error = NFSERR_BADXDR; goto nfsmout; } NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; if (fxdr_unsigned(int, *tl) != fhcnt) { printf("EEK! bad fhcnt\n"); error = NFSERR_BADXDR; goto nfsmout; } for (j = 0; j < fhcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_fh[j] = nfhp; flp->nfsfl_fhcnt++; nfhp->nfh_len = nfhlen; NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); } if (flp->nfsfl_iomode == gotiomode) { /* Keep the list in increasing offset order. */ tflp = LIST_FIRST(flhp); prevflp = NULL; while (tflp != NULL && tflp->nfsfl_off < flp->nfsfl_off) { prevflp = tflp; tflp = LIST_NEXT(tflp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(flhp, flp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, flp, nfsfl_list); } else { printf("nfscl_layoutget(): got wrong iomode\n"); nfscl_freeflayout(flp); } flp = NULL; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: if (error != 0 && flp != NULL) nfscl_freeflayout(flp); mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Get Device Info. */ int nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, NFSPROC_T *p) { uint32_t cnt, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct sockaddr_storage ss; struct nfsclds *dsp = NULL, **dspp; struct nfscldevinfo *ndi; int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt; uint8_t stripeindex; *ndip = NULL; ndi = NULL; nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(100000); if (notifybitsp != NULL && *notifybitsp != 0) { *tl = txdr_unsigned(1); /* One word of bits. */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*notifybitsp); } else *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); if (layouttype != fxdr_unsigned(int, *tl++)) printf("EEK! devinfo layout type not same!\n"); stripecnt = fxdr_unsigned(int, *++tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); if (stripecnt < 1 || stripecnt > 4096) { printf("NFS devinfo stripecnt %d: out of range\n", stripecnt); error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); if (addrcnt < 1 || addrcnt > 128) { printf("NFS devinfo addrcnt %d: out of range\n", addrcnt); error = NFSERR_BADXDR; goto nfsmout; } /* * Now we know how many stripe indices and addresses, so * we can allocate the structure the correct size. */ i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + 1; NFSCL_DEBUG(4, "stripeindices=%d\n", i); ndi = malloc(sizeof(*ndi) + (addrcnt + i) * sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_stripecnt = stripecnt; ndi->nfsdi_addrcnt = addrcnt; /* Fill in the stripe indices. */ for (i = 0; i < stripecnt; i++) { stripeindex = fxdr_unsigned(uint8_t, *tl++); NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); if (stripeindex >= addrcnt) { printf("NFS devinfo stripeindex %d: too big\n", (int)stripeindex); error = NFSERR_BADXDR; goto nfsmout; } nfsfldi_setstripeindex(ndi, i, stripeindex); } /* Now, dissect the server address(es). */ safilled = 0; for (i = 0; i < addrcnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(uint32_t, *tl); if (cnt == 0) { printf("NFS devinfo 0 len addrlist\n"); error = NFSERR_BADXDR; goto nfsmout; } dspp = nfsfldi_addr(ndi, i); pos = arc4random() % cnt; /* Choose one. */ safilled = 0; for (j = 0; j < cnt; j++) { error = nfsv4_getipaddr(nd, &ss, &isudp); if (error != 0 && error != EPERM) { error = NFSERR_BADXDR; goto nfsmout; } if (error == 0 && isudp == 0) { /* * The algorithm is: * - use "pos" entry if it is of the * same af_family or none of them * is of the same af_family * else * - use the first one of the same * af_family. */ if ((safilled == 0 && ss.ss_family == nmp->nm_nam->sa_family) || (j == pos && (safilled == 0 || ss.ss_family == nmp->nm_nam->sa_family)) || (safilled == 1 && ss.ss_family == nmp->nm_nam->sa_family)) { error = nfsrpc_fillsa(nmp, &ss, &dsp, p); if (error == 0) { *dspp = dsp; if (ss.ss_family == nmp->nm_nam->sa_family) safilled = 2; else safilled = 1; } } } } if (safilled == 0) break; } /* And the notify bits. */ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (safilled != 0) { bitcnt = fxdr_unsigned(int, *tl); if (bitcnt > 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (notifybitsp != NULL) *notifybitsp = fxdr_unsigned(uint32_t, *tl); } *ndip = ndi; } else error = EPERM; } if (nd->nd_repstat != 0) error = nd->nd_repstat; nfsmout: if (error != 0 && ndi != NULL) nfscl_freedevinfo(ndi); mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutCommit. */ int nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error, outcnt, i; uint8_t *cp; nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl++ = newnfs_true; if (lastbyte < off) lastbyte = off; else if (lastbyte >= (off + len)) lastbyte = off + len - 1; txdr_hyper(lastbyte, tl); tl += 2; *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); *tl = txdr_unsigned(layoutupdatecnt); if (layoutupdatecnt > 0) { KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES, ("Must be nil for Files Layout")); outcnt = NFSM_RNDUP(layoutupdatecnt); NFSM_BUILD(cp, uint8_t *, outcnt); NFSBCOPY(layp, cp, layoutupdatecnt); cp += layoutupdatecnt; for (i = 0; i < (outcnt - layoutupdatecnt); i++) *cp++ = 0x0; } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutReturn. */ int nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error, outcnt, i; uint8_t *cp; nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layoutreturn); if (layoutreturn == NFSLAYOUTRETURN_FILE) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl = txdr_unsigned(layoutcnt); if (layoutcnt > 0) { outcnt = NFSM_RNDUP(layoutcnt); NFSM_BUILD(cp, uint8_t *, outcnt); NFSBCOPY(layp, cp, layoutcnt); cp += layoutcnt; for (i = 0; i < (outcnt - layoutcnt); i++) *cp++ = 0x0; } } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl; } } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Acquire a layout and devinfo, if possible. The caller must have acquired * a reference count on the nfsclclient structure before calling this. * Return the layout in lypp with a reference count on it, if successful. */ static int nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp, *tflp; struct nfscldevinfo *dip; struct nfsclflayouthead flh; int error = 0, islocked, layoutlen, recalled, retonclose; nfsv4stateid_t stateid; struct nfsclsession *tsep; *lypp = NULL; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, off, &flp, &recalled); islocked = 0; if (lyp == NULL || flp == NULL) { if (recalled != 0) return (EIO); LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) { stateid.seqid = 0; stateid.other[0] = stateidp->other[0]; stateid.other[1] = stateidp->other[1]; stateid.other[2] = stateidp->other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX, (uint64_t)0, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } else { islocked = 1; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, off, UINT64_MAX, (uint64_t)0, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } if (error == 0) LIST_FOREACH(tflp, &flh, nfsfl_list) { error = nfscl_adddevinfo(nmp, NULL, tflp); if (error != 0) { error = nfsrpc_getdeviceinfo(nmp, tflp->nfsfl_dev, NFSLAYOUT_NFSV4_1_FILES, notifybitsp, &dip, cred, p); if (error != 0) break; error = nfscl_adddevinfo(nmp, dip, tflp); if (error != 0) printf( "getlayout: cannot add\n"); } } if (error == 0) { /* * nfscl_layout() always returns with the nfsly_lock * set to a refcnt (shared lock). */ error = nfscl_layout(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, &flh, &lyp, cred, p); if (error == 0) *lypp = lyp; } else if (islocked != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); } else *lypp = lyp; return (error); } /* * Do a TCP connection plus exchange id and create session. * If successful, a "struct nfsclds" is linked into the list for the * mount point and a pointer to it is returned. */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp, struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad, *ssd; struct sockaddr_in6 *msad6, *sad6, *ssd6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; int error; enum nfsclds_state retv; uint32_t sequenceid; KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("nfsrpc_fillsa: NULL nr_cred")); NFSLOCKCLSTATE(); clp = nmp->nm_clp; NFSUNLOCKCLSTATE(); if (clp == NULL) return (EPERM); if (ssp->ss_family == AF_INET) { ssd = (struct sockaddr_in *)ssp; NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad != NULL && msad->sin_family == AF_INET && ssd->sin_addr.s_addr == msad->sin_addr.s_addr && ssd->sin_port == msad->sin_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); NFSCL_DEBUG(4, "fnd same addr\n"); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad = (struct sockaddr_in *) tdsp->nfsclds_sockp->nr_nam; else msad = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); sad->sin_len = sizeof(*sad); sad->sin_family = AF_INET; sad->sin_port = ssd->sin_port; sad->sin_addr.s_addr = ssd->sin_addr.s_addr; nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad; } else if (ssp->ss_family == AF_INET6) { ssd6 = (struct sockaddr_in6 *)ssp; NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad6 != NULL && msad6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr, &msad6->sin6_addr) && ssd6->sin6_port == msad6->sin6_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad6 = (struct sockaddr_in6 *) tdsp->nfsclds_sockp->nr_nam; else msad6 = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); sad6->sin6_len = sizeof(*sad6); sad6->sin6_family = AF_INET6; sad6->sin6_port = ssd6->sin6_port; NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr, sizeof(struct in6_addr)); nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad6; } else return (EPERM); nrp->nr_sotype = SOCK_STREAM; mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); nrp->nr_prog = NFS_PROG; nrp->nr_vers = NFS_VER4; /* * Use the credentials that were used for the mount, which are * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. * Ref. counting the credentials with crhold() is probably not * necessary, since nm_sockreq.nr_cred won't be crfree()'d until * unmount, but I did it anyhow. */ nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); error = newnfs_connect(nmp, nrp, NULL, p, 0); NFSCL_DEBUG(3, "DS connect=%d\n", error); /* Now, do the exchangeid and create session. */ if (error == 0) { error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); if (error != 0) newnfs_disconnect(nrp); } if (error == 0) { dsp->nfsclds_sockp = nrp; NFSLOCKMNT(nmp); retv = nfscl_getsameserver(nmp, dsp, &tdsp); NFSCL_DEBUG(3, "getsame ret=%d\n", retv); if (retv == NFSDSP_USETHISSESSION) { NFSUNLOCKMNT(nmp); /* * If there is already a session for this server, * use it. */ (void)newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); *dspp = tdsp; return (0); } if (retv == NFSDSP_SEQTHISSESSION) sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; else sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, nrp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } else { NFSFREECRED(nrp->nr_cred); NFSFREEMUTEX(&nrp->nr_mtx); free(nrp->nr_nam, M_SONAME); free(nrp, M_NFSSOCKREQ); } if (error == 0) { NFSCL_DEBUG(3, "add DS session\n"); /* * Put it at the end of the list. That way the list * is ordered by when the entry was added. This matters * since the one done first is the one that should be * used for sequencid'ing any subsequent create sessions. */ NFSLOCKMNT(nmp); TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); NFSUNLOCKMNT(nmp); *dspp = dsp; } else if (dsp != NULL) { newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); } return (error); } /* * Do the NFSv4.1 Reclaim Complete. */ int nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Initialize the slot tables for a session. */ static void nfscl_initsessionslots(struct nfsclsession *sep) { int i; for (i = 0; i < NFSV4_CBSLOTS; i++) { if (sep->nfsess_cbslots[i].nfssl_reply != NULL) m_freem(sep->nfsess_cbslots[i].nfssl_reply); NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); } for (i = 0; i < 64; i++) sep->nfsess_slotseq[i] = 0; sep->nfsess_slots = 0; } /* * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). */ int nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfscllayout *layp; struct nfscldevinfo *dip; struct nfsclflayout *rflp; nfsv4stateid_t stateid; struct ucred *newcred; uint64_t lastbyte, len, off, oresid, xfer; int eof, error, iolaymode, recalled; void *lckp; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0) return (EIO); /* Now, get a reference cnt on the clientid for this mount. */ if (nfscl_getref(nmp) == 0) return (EIO); /* Find an appropriate stateid. */ newcred = NFSNEWCRED(cred); error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, rwaccess, 1, newcred, p, &stateid, &lckp); if (error != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (error); } /* Search for a layout for this file. */ off = uiop->uio_offset; layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, off, &rflp, &recalled); if (layp == NULL || rflp == NULL) { if (recalled != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (EIO); } if (layp != NULL) { nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); layp = NULL; } /* Try and get a Layout, if it is supported. */ if (rwaccess == NFSV4OPEN_ACCESSWRITE || (np->n_flag & NWRITEOPENED) != 0) iolaymode = NFSLAYOUTIOMODE_RW; else iolaymode = NFSLAYOUTIOMODE_READ; error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, NULL, &stateid, off, &layp, newcred, p); if (error != 0) { NFSLOCKNODE(np); np->n_flag |= NNOLAYOUT; NFSUNLOCKNODE(np); if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); if (layp != NULL) nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } } /* * Loop around finding a layout that works for the first part of * this I/O operation, and then call the function that actually * does the RPC. */ eof = 0; len = (uint64_t)uiop->uio_resid; while (len > 0 && error == 0 && eof == 0) { off = uiop->uio_offset; error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); if (error == 0) { oresid = xfer = (uint64_t)uiop->uio_resid; if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) xfer = rflp->nfsfl_end - rflp->nfsfl_off; dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev, rflp->nfsfl_devp); if (dip != NULL) { error = nfscl_doflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, newcred, p); nfscl_reldevinfo(dip); lastbyte = off + xfer - 1; if (error == 0) { NFSLOCKCLSTATE(); if (lastbyte > layp->nfsly_lastbyte) layp->nfsly_lastbyte = lastbyte; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_OPENMODE && rwaccess == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } } else error = EIO; if (error == 0) len -= (oresid - (uint64_t)uiop->uio_resid); } } if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } /* * Find a file layout that will handle the first bytes of the requested * range and return the information from it needed to to the I/O operation. */ int nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp) { struct nfsclflayout *flp, *nflp, *rflp; uint32_t rw; rflp = NULL; rw = rwaccess; /* For reading, do the Read list first and then the Write list. */ do { if (rw == NFSV4OPEN_ACCESSREAD) flp = LIST_FIRST(&lyp->nfsly_flayread); else flp = LIST_FIRST(&lyp->nfsly_flayrw); while (flp != NULL) { nflp = LIST_NEXT(flp, nfsfl_list); if (flp->nfsfl_off > off) break; if (flp->nfsfl_end > off && (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) rflp = flp; flp = nflp; } if (rw == NFSV4OPEN_ACCESSREAD) rw = NFSV4OPEN_ACCESSWRITE; else rw = 0; } while (rw != 0); if (rflp != NULL) { /* This one covers the most bytes starting at off. */ *retflpp = rflp; return (0); } return (EIO); } /* * Do I/O using an NFSv4.1 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; int commit_thru_mds, error = 0, stripe_index, stripe_pos; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; np = VTONFS(vp); rel_off = off - flp->nfsfl_patoff; stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % dp->nfsdi_stripecnt; transfer = stripe_unit_size - (rel_off % stripe_unit_size); /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); if (len > transfer) xfer = transfer; else xfer = len; if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { /* Dense layout. */ if (stripe_pos >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_pos]; io_off = (rel_off / (stripe_unit_size * dp->nfsdi_stripecnt)) * stripe_unit_size + rel_off % stripe_unit_size; } else { /* Sparse layout. */ if (flp->nfsfl_fhcnt > 1) { if (stripe_index >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_index]; } else if (flp->nfsfl_fhcnt == 1) fhp = flp->nfsfl_fh[0]; else fhp = np->n_fhp; io_off = off; } if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) commit_thru_mds = 1; else commit_thru_mds = 0; if (rwflag == FREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, io_off, xfer, fhp, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } if (error == 0) { transfer = stripe_unit_size; stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; len -= xfer; off += xfer; } } return (error); } /* * The actual read RPC done to a DS. */ static int nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; int error, retlen; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; nd->nd_mrep = NULL; nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(io_off, tl); *(tl + 2) = txdr_unsigned(len); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error != 0) return (error); if (nd->nd_repstat != 0) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *tl); NFSM_STRSIZ(retlen, len); error = nfsm_mbufuio(nd, uiop, retlen); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * The actual write RPC done to a DS. */ static int nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int error, rlen, commit, committed = NFSWRITE_FILESYNC; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); nd->nd_mrep = NULL; nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(io_off, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); nfsm_uiombuf(nd, uiop, len); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error != 0) return (error); if (nd->nd_repstat != 0) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); error = nd->nd_repstat; } else { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; if (commit_thru_mds != 0) { NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } else { NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * Free up the nfsclds structure. */ void nfscl_freenfsclds(struct nfsclds *dsp) { int i; if (dsp == NULL) return; if (dsp->nfsclds_sockp != NULL) { NFSFREECRED(dsp->nfsclds_sockp->nr_cred); NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); free(dsp->nfsclds_sockp->nr_nam, M_SONAME); free(dsp->nfsclds_sockp, M_NFSSOCKREQ); } NFSFREEMUTEX(&dsp->nfsclds_mtx); NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); for (i = 0; i < NFSV4_CBSLOTS; i++) { if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) m_freem( dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); } free(dsp, M_NFSCLDS); } static enum nfsclds_state nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, struct nfsclds **retdspp) { struct nfsclds *dsp, *cur_dsp; /* * Search the list of nfsclds structures for one with the same * server. */ cur_dsp = NULL; TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && dsp->nfsclds_servownlen != 0 && !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, dsp->nfsclds_servownlen) && dsp->nfsclds_sess.nfsess_defunct == 0) { NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", TAILQ_FIRST(&nmp->nm_sess), dsp, dsp->nfsclds_flags); /* Server major id matches. */ if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { *retdspp = dsp; return (NFSDSP_USETHISSESSION); } /* * Note the first match, so it can be used for * sequence'ing new sessions. */ if (cur_dsp == NULL) cur_dsp = dsp; } } if (cur_dsp != NULL) { *retdspp = cur_dsp; return (NFSDSP_SEQTHISSESSION); } return (NFSDSP_NOTFOUND); } #ifdef notyet /* * NFS commit rpc to a DS. */ static int nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfssockreq *nrp; int error; nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKDS(dsp); if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); error = NFSERR_STALEWRITEVERF; } NFSUNLOCKDS(dsp); } nfsmout: if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } #endif Index: head/sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvnops.c (revision 318735) +++ head/sys/fs/nfsclient/nfs_clvnops.c (revision 318736) @@ -1,3541 +1,3539 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2, 3 and 4 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfscl_accesscache_flush_done_probe; uint32_t nfscl_accesscache_flush_done_id; dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfscl_accesscache_get_hit_probe, dtrace_nfscl_accesscache_get_miss_probe; uint32_t nfscl_accesscache_get_hit_id; uint32_t nfscl_accesscache_get_miss_id; dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfscl_accesscache_load_done_probe; uint32_t nfscl_accesscache_load_done_id; #endif /* !KDTRACE_HOOKS */ /* Defs */ #define TRUE 1 #define FALSE 0 extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_useacl; extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, struct thread *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_pathconf_t nfs_pathconf; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; static int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; static vop_set_text_t nfs_set_text; /* * Global vfs data structures for nfs */ struct vop_vector newnfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = ncl_getpages, .vop_putpages = ncl_putpages, .vop_inactive = ncl_inactive, .vop_link = nfs_link, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = ncl_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, .vop_set_text = nfs_set_text, }; struct vop_vector newnfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = ncl_inactive, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = ncl_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ -#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) - SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfs_prime_access_cache = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, &nfs_prime_access_cache, 0, "Prime NFS ACCESS cache when fetching attributes"); static int newnfs_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, &newnfs_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int newnfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &newnfs_directio_enable, 0, "Enable NFS directio"); int nfs_keep_dirty_on_error; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * ncl_iod_mutex : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred, u_int32_t *retmode) { int error = 0, attrflag, i, lrupos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, &rmode, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { lrupos = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (np->n_accesscache[i].uid == cred->cr_uid) { np->n_accesscache[i].mode = rmode; np->n_accesscache[i].stamp = time_second; break; } if (i > 0 && np->n_accesscache[i].stamp < np->n_accesscache[lrupos].stamp) lrupos = i; } if (i == NFS_ACCESSCACHESIZE) { np->n_accesscache[lrupos].uid = cred->cr_uid; np->n_accesscache[lrupos].mode = rmode; np->n_accesscache[lrupos].stamp = time_second; } mtx_unlock(&np->n_mtx); if (retmode != NULL) *retmode = rmode; KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } #ifdef KDTRACE_HOOKS if (error != 0) KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, error); #endif return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0, i, gotahit; u_int32_t mode, wmode, rmode; int v34 = NFS_ISV34(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3 or v4, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v34) { if (ap->a_accmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_EXECUTE; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; } else { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_LOOKUP; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; if (ap->a_accmode & VDELETE_CHILD) mode |= NFSACCESS_MODIFY; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSACCESS_READ | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_EXECUTE | NFSACCESS_DELETE | NFSACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ gotahit = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); gotahit = 1; } break; } } mtx_unlock(&np->n_mtx); #ifdef KDTRACE_HOOKS if (gotahit != 0) KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, ap->a_cred->cr_uid, mode); else KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, ap->a_cred->cr_uid, mode); #endif if (gotahit == 0) { /* * Either a no, or a don't know. Go to the wire. */ NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && (rmode & mode) != mode) error = EACCES; } return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = ncl_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = ncl_readdirrpc(vp, &auio, ap->a_cred, ap->a_td); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = ncl_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; struct ucred *cred; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * For NFSv4, we need to do the Open Op before cache validation, * so that we conform to RFC3530 Sec. 9.3.1. */ if (NFS_ISV4(vp)) { error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); if (error) { error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); return (error); } } /* * Now, if this Open will be doing reading, re-validate/flush the * cache, so that Close/Open coherency is maintained. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) return (EBADF); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } else { mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) return (EBADF); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) return (EBADF); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; } np->n_directio_opens++; } /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) np->n_flag |= NWRITEOPENED; /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using * these write credentials is preferable to the credentials of * whatever thread happens to be doing the VOP_PUTPAGES() since * the write RPCs are less likely to fail with EACCES. */ if ((fmode & FWRITE) != 0) { cred = np->n_writecred; np->n_writecred = crhold(ap->a_cred); } else cred = NULL; mtx_unlock(&np->n_mtx); if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to ncl_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NFS Version 4 - flush the dirty buffers and commit them, if * nfscl_mustflush() says this is necessary. * It is necessary if there is no write delegation held, * in order to satisfy open/close coherency. * If the file isn't cached on local stable storage, * it may be necessary in order to detect "out of space" * errors from the server, if the write delegation * issued by the server doesn't allow the file to grow. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; struct ucred *cred; int error = 0, ret, localcred = 0; int fmode = ap->a_fflag; if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) return (0); /* * During shutdown, a_cred isn't valid, so just use root. */ if (ap->a_cred == NOCRED) { cred = newnfs_getcred(); localcred = 1; } else { cred = ap->a_cred; } if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. * These races occur when there is no longer the old * traditional vnode locking implemented for Vnode Ops. */ int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); /* np->n_flag &= ~NMODIFIED; */ } else if (NFS_ISV4(vp)) { if (nfscl_mustflush(vp) != 0) { int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); /* * as above w.r.t races when clearing * NMODIFIED. * np->n_flag &= ~NMODIFIED; */ } } else { error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) return (EBADF); } mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. * --> A user found that their RPC count dropped by 20% when * this was commented out and I can't see any requirement * for it, so I've disabled it when negative lookups are * enabled. (What does this have to do with negative lookup * caching? Well nothing, except it was reported by the * same user that needed negative lookup caching and I wanted * there to be a way to disable it to see if it * is the cause of some caching/coherency issue that might * crop up.) */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (NFS_ISV4(vp)) { /* * Get attributes so "change" is up to date. */ if (error == 0 && nfscl_mustflush(vp) != 0 && vp->v_type == VREG && (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, NULL); if (!ret) { np->n_change = nfsva.na_filerev; (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 0); } } /* * and do the close. */ ret = nfsrpc_close(vp, 0, ap->a_td); if (!error && ret) error = ret; if (error) error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); } if (newnfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } if (localcred) NFSFREECRED(cred); return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; /* XXX */ struct nfsnode *np = VTONFS(vp); int error = 0; struct nfsvattr nfsva; struct vattr *vap = ap->a_vap; struct vattr vattr; /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (ncl_getattrcache(vp, &vattr) == 0) { vap->va_type = vattr.va_type; vap->va_mode = vattr.va_mode; vap->va_nlink = vattr.va_nlink; vap->va_uid = vattr.va_uid; vap->va_gid = vattr.va_gid; vap->va_fsid = vattr.va_fsid; vap->va_fileid = vattr.va_fileid; vap->va_size = vattr.va_size; vap->va_blocksize = vattr.va_blocksize; vap->va_atime = vattr.va_atime; vap->va_mtime = vattr.va_mtime; vap->va_ctime = vattr.va_ctime; vap->va_gen = vattr.va_gen; vap->va_flags = vattr.va_flags; vap->va_rdev = vattr.va_rdev; vap->va_bytes = vattr.va_bytes; vap->va_filerev = vattr.va_filerev; /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); return (0); } if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); return (0); } } error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); if (!error) error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); if (!error) { /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; /* XXX */ struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags and marking of atimes are not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 0 : V_SAVE, td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) error = EBADF; if (error != 0) { vnode_pager_setsize(vp, tsize); return (error); } /* * Call nfscl_delegmodtime() to set the modify time * locally, as required. */ nfscl_delegmodtime(vp); } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in ncl_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * ncl_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.na_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); } } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) return (EBADF); if (error == EINTR || error == EIO) return (error); } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred, td); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.na_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct nfsnode *np = VTONFS(vp); int error, ret, attrflag, i; struct nfsvattr nfsva; if (NFS_ISV34(vp)) { mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; np->n_flag |= NDELEGMOD; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); } error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct mount *mp = dvp->v_mount; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; struct nfsnode *np, *newnp; int error = 0, attrflag, dattrflag, ltype, ncticks; struct thread *td = cnp->cn_thread; struct nfsfh *nfhp; struct nfsvattr dnfsva, nfsva; struct vattr vattr; struct timespec nctime; *vpp = NULLVP; if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(mp); np = VTONFS(dvp); /* For NFSv4, wait until any remove is done. */ mtx_lock(&np->n_mtx); while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { np->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); } mtx_unlock(&np->n_mtx); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return (error); error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); if (error > 0 && error != ENOENT) return (error); if (error == -1) { /* * Lookups of "." are special and always return the * current directory. cache_lookup() already handles * associated locking bookkeeping, etc. */ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { /* XXX: Is this really correct? */ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } /* * We only accept a positive hit in the cache if the * change time of the file matches our cached copy. * Otherwise, we discard the cache entry and fallback * to doing a lookup RPC. We also only trust cache * entries for less than nm_nametimeo seconds. * * To better handle stale file handles and attributes, * clear the attribute cache of this node if it is a * leaf component, part of an open() call, and not * locally modified before fetching the attributes. * This should allow stale file handles to be detected * here where we can fall back to a LOOKUP RPC to * recover rather than having nfs_open() detect the * stale file handle and failing open(2) with ESTALE. */ newvp = *vpp; newnp = VTONFS(newvp); if (!(nmp->nm_flag & NFSMNT_NOCTO) && (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(newnp->n_flag & NMODIFIED)) { mtx_lock(&newnp->n_mtx); newnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&newnp->n_mtx); } if (nfscl_nodeleg(newvp, 0) == 0 || ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } else if (error == ENOENT) { if (dvp->v_iflag & VI_DOOMED) return (ENOENT); /* * We only accept a negative hit in the cache if the * modification time of the parent directory matches * the cached copy in the name cache entry. * Otherwise, we discard all of the negative cache * entries for this directory. We also only trust * negative cache entries for up to nm_negnametimeo * seconds. */ if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); } error = 0; newvp = NULLVP; NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if (error != ENOENT) { if (NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* The requested file was not found. */ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { /* * XXX: UFS does a full VOP_ACCESS(dvp, * VWRITE) here instead of just checking * MNT_RDONLY. */ if (mp->mnt_flag & MNT_RDONLY) return (EROFS); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { /* * Cache the modification time of the parent * directory from the post-op attributes in * the name cache entry. The negative cache * entry will be ignored once the directory * has changed. Don't bother adding the entry * if the directory has already changed. */ mtx_lock(&np->n_mtx); if (timespeccmp(&np->n_vattr.na_mtime, &dnfsva.na_mtime, ==)) { mtx_unlock(&np->n_mtx); cache_enter_time(dvp, NULL, cnp, &dnfsva.na_mtime, NULL); } else mtx_unlock(&np->n_mtx); } return (ENOENT); } /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); return (EISDIR); } error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); *vpp = newvp; cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { ltype = NFSVOPISLOCKED(dvp); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); NFSVOPUNLOCK(dvp, 0); error = vfs_busy(mp, 0); NFSVOPLOCK(dvp, ltype | LK_RETRY); vfs_rel(mp); if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { vfs_unbusy(mp); error = ENOENT; } if (error != 0) return (error); } NFSVOPUNLOCK(dvp, 0); error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error == 0) newvp = NFSTOV(np); vfs_unbusy(mp); if (newvp != dvp) NFSVOPLOCK(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) { if (newvp == dvp) vrele(newvp); else vput(newvp); } error = ENOENT; } if (error != 0) return (error); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else { error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(np->n_flag & NMODIFIED)) { /* * Flush the attribute cache when opening a * leaf node to ensure that fresh attributes * are fetched in nfs_open() since we did not * fetch attributes from the LOOKUP reply. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&np->n_mtx); } } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); *vpp = newvp; return (0); } /* * nfs read call. * Just call ncl_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by ncl_doio() from below the buffer cache. */ int ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs read rpc call * Ditto above */ int ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, NULL, NULL, NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs write call */ int ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; int error, attrflag, ret; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, iomode, must_commit, NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1); else ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (DOINGASYNC(vp)) *iomode = NFSWRITE_FILESYNC; if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsvattr nfsva, dnfsva; struct vnode *newvp = NULL; struct nfsnode *np = NULL, *dnp; struct nfsfh *nfhp; struct vattr vattr; int error = 0, attrflag, dattrflag; u_int32_t rdev; if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = vap->va_rdev; else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else return (EOPNOTSUPP); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (!nfhp) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag != 0) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { *vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static struct mtx nfs_cverf_mtx; MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", MTX_DEF); static nfsquad_t nfs_get_cverf(void) { static nfsquad_t cverf; nfsquad_t ret; static int cverf_initialized = 0; mtx_lock(&nfs_cverf_mtx); if (cverf_initialized == 0) { cverf.lval[0] = arc4random(); cverf.lval[1] = arc4random(); cverf_initialized = 1; } else cverf.qval++; ret = cverf; mtx_unlock(&nfs_cverf_mtx); return (ret); } /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct nfsmount *nmp; struct nfsvattr dnfsva, nfsva; struct nfsfh *nfhp; nfsquad_t cverf; int error = 0, attrflag, dattrflag, fmode = 0; struct vattr vattr; /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; dnp = VTONFS(dvp); nmp = VFSTONFS(vnode_mount(dvp)); again: /* For NFSv4, wait until any remove is done. */ mtx_lock(&dnp->n_mtx); while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { dnp->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); } mtx_unlock(&dnp->n_mtx); cverf = nfs_get_cverf(); error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (nfhp == NULL) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp != NULL) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag == 0) error = nfsrpc_getattr(newvp, cnp->cn_cred, cnp->cn_thread, &nfsva, NULL); if (error == 0) error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (error) { if (newvp != NULL) { vput(newvp); newvp = NULL; } if (NFS_ISV34(dvp) && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { if (nfscl_checksattr(vap, &nfsva)) { error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); if (error && (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)) { /* try again without setting uid/gid */ vap->va_uid = (uid_t)VNOVAL; vap->va_gid = (uid_t)VNOVAL; error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { if ((cnp->cn_flags & MAKEENTRY) && attrflag) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); *ap->a_vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) error = EBADF; else if (error != EINTR && error != EIO) /* Do the rpc */ error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int ncl_removeit(struct sillyrename *sp, struct vnode *vp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). */ static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td) { struct nfsvattr dnfsva; struct nfsnode *dnp = VTONFS(dvp); int error = 0, dattrflag; mtx_lock(&dnp->n_mtx); dnp->n_flag |= NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, &dattrflag, NULL); mtx_lock(&dnp->n_mtx); if ((dnp->n_flag & NREMOVEWANT)) { dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); mtx_unlock(&dnp->n_mtx); wakeup((caddr_t)dnp); } else { dnp->n_flag &= ~NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct nfsnode *fnp = VTONFS(ap->a_fvp); struct nfsnode *tdnp = VTONFS(ap->a_tdvp); struct nfsv4node *newv4 = NULL; int error; KASSERT((tcnp->cn_flags & HASBUF) != 0 && (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); NFSVOPUNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (error == 0 && NFS_ISV4(tdvp)) { /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ MALLOC(newv4, struct nfsv4node *, sizeof (struct nfsv4node) + tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); mtx_lock(&tdnp->n_mtx); mtx_lock(&fnp->n_mtx); if (fnp->n_v4 != NULL && fvp->v_type == VREG && (fnp->n_v4->n4_namelen != tcnp->cn_namelen || NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen) || tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; bcopy(tcnp->cn_nameptr, nnn, nnnl); nnn[nnnl] = '\0'; printf("ren replace=%s\n",nnn); } #endif FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); fnp->n_v4 = newv4; newv4 = NULL; fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; fnp->n_v4->n4_namelen = tcnp->cn_namelen; NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len); NFSBCOPY(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); } mtx_unlock(&tdnp->n_mtx); mtx_unlock(&fnp->n_mtx); if (newv4 != NULL) FREE((caddr_t)newv4, M_NFSV4NODE); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { struct nfsvattr fnfsva, tnfsva; struct nfsnode *fdnp = VTONFS(fdvp); struct nfsnode *tdnp = VTONFS(tdvp); int error = 0, fattrflag, tattrflag; error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, &tattrflag, NULL, NULL); mtx_lock(&fdnp->n_mtx); fdnp->n_flag |= NMODIFIED; if (fattrflag != 0) { mtx_unlock(&fdnp->n_mtx); (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); } else { fdnp->n_attrstamp = 0; mtx_unlock(&fdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); } mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (tattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (error && NFS_ISV4(fdvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np, *tdnp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag; /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, &dattrflag, NULL); tdnp = VTONFS(tdvp); mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); else { np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); } if (error && NFS_ISV4(vp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsvattr nfsva, dnfsva; struct nfsfh *nfhp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; int error = 0, attrflag, dattrflag, ret; vap->va_type = VLNK; error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) newvp = NFSTOV(np); else if (!error) error = ret; } if (newvp != NULL) { if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) { /* * If we do not have an error and we could not extract the * newvp from the response due to the request being NFSv2, we * have to do a lookup in order to obtain a newvp to return. */ error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { *ap->a_vpp = newvp; } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); } return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct vattr vattr; struct nfsfh *nfhp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag, ret; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); vap->va_type = VDIR; error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) { newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) error = ret; } if (!error && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && dattrflag != 0) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, &dnfsva.na_ctime); *ap->a_vpp = newvp; } return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *dnp; struct nfsvattr dnfsva; int error, dattrflag; if (dvp == vp) return (EINVAL); error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } cache_purge(dvp); cache_purge(vp); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; ssize_t tresid, left; int error = 0; struct vattr vattr; if (ap->a_eofflag != NULL) *ap->a_eofflag = 0; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; return (0); } else mtx_unlock(&np->n_mtx); } } /* * NFS always guarantees that directory entries don't straddle * DIRBLKSIZ boundaries. As such, we need to limit the size * to an exact multiple of DIRBLKSIZ, to avoid copying a partial * directory entry. */ left = uio->uio_resid % DIRBLKSIZ; if (left == uio->uio_resid) return (EINVAL); uio->uio_resid -= left; /* * Call ncl_bioread() to do the real work. */ tresid = uio->uio_resid; error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; } /* Add the partial DIRBLKSIZ (left) back in. */ uio->uio_resid += left; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by ncl_doio(). */ int ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, eof, attrflag; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at the end of the directory or have filled * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). */ int ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, attrflag, eof; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at end of the directory or have filled the * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NEWNFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accommodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, vp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NEWNFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL, *vp; struct nfsnode *np, *dnp = VTONFS(dvp); struct nfsfh *nfhp, *onfhp; struct nfsvattr nfsva, dnfsva; struct componentname cn; int error = 0, attrflag, dattrflag; u_int hash; error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (npp && !error) { if (*npp != NULL) { np = *npp; vp = NFSTOV(np); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ if (np->n_v4 != NULL && nfsva.na_type == VREG && (np->n_v4->n4_namelen != len || NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (len < 100) ? len : 99; bcopy(name, nnn, nnnl); nnn[nnnl] = '\0'; printf("replace=%s\n",nnn); } #endif FREE((caddr_t)np->n_v4, M_NFSV4NODE); MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + len - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = len; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); } hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); onfhp = np->n_fhp; /* * Rehash node for new file handle. */ vfs_hash_rehash(vp, hash); np->n_fhp = nfhp; if (onfhp != NULL) FREE((caddr_t)onfhp, M_NFSFH); newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; } else { cn.cn_nameptr = name; cn.cn_namelen = len; error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); } if (!attrflag && *npp == NULL) { if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Nfs Version 3 and 4 commit rpc */ int ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error, attrflag; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling ncl_asyncio(), otherwise just all ncl_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp; struct vnode *vp; struct ucred *cr; bp = ap->a_bp; vp = ap->a_vp; KASSERT(bp->b_vp == vp, ("missing b_getvp")); KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / DEV_BSIZE); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) (void) ncl_doio(vp, bp, cr, curthread, 1); return (0); } /* * fsync vnode op. Just call ncl_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { if (ap->a_vp->v_type != VREG) { /* * For NFS, metadata is changed synchronously on the server, * so there is nothing to flush. Also, ncl_flush() clears * the NMODIFIED flag and that shouldn't be done here for * directories. */ return (0); } return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. * If the called_from_renewthread argument is TRUE, it has been called * from the NFSv4 renew thread and, as such, cannot block indefinitely * waiting for a buffer write to complete. */ int ncl_flush(struct vnode *vp, int waitfor, struct thread *td, int commit, int called_from_renewthread) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1, trycnt = 0; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (called_from_renewthread != 0) slptimeo = hz; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, ncl_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV34(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = ncl_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = ncl_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) ncl_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (called_from_renewthread != 0) { /* * Return EIO so the flush will be retried * later. */ error = EIO; goto done; } if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); if (called_from_renewthread != 0) { /* * Return EIO so that the flush will be * retried later. */ error = EIO; goto done; } error = newnfs_sigintr(nmp, td); if (error) goto done; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = newnfs_msleep(td, &np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (newnfs_sigintr(nmp, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); if (NFSHASPNFS(nmp)) { nfscl_layoutcommit(vp, td); /* * Invalidate the attribute cache, since writes to a DS * won't update the size attribute. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; } else mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); if (error == 0 && commit != 0 && waitfor == MNT_WAIT && (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || np->n_directio_asyncwr != 0)) { if (trycnt++ < 5) { /* try, try again... */ passone = 1; wcred = NULL; bvec = NULL; bvecsize = 0; goto again; } vn_printf(vp, "ncl_flush failed"); error = called_from_renewthread != 0 ? EIO : EBUSY; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred; struct nfsnode *np = VTONFS(ap->a_vp); struct proc *p = (struct proc *)ap->a_id; struct thread *td = curthread; /* XXX */ struct vattr va; int ret, error = EOPNOTSUPP; u_quad_t size; if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { if (vp->v_type != VREG) return (EINVAL); if ((ap->a_flags & F_POSIX) != 0) cred = p->p_ucred; else cred = td->td_ucred; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } /* * If this is unlocking a write locked region, flush and * commit them before unlocking. This is required by * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); /* * Loop around doing the lock op, while a blocking lock * must wait for the lock op to succeed. */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { NFSVOPUNLOCK(vp, 0); error = nfs_catnap(PZERO | PCATCH, ret, "ncladvl"); if (error) return (EINTR); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } } } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK); if (ret == NFSERR_DENIED) { NFSVOPUNLOCK(vp, 0); return (EAGAIN); } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { NFSVOPUNLOCK(vp, 0); return (ret); } else if (ret != 0) { NFSVOPUNLOCK(vp, 0); return (EACCES); } /* * Now, if we just got a lock, invalidate data in the buffer * cache, as required, so that the coherency conforms with * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_SETLK) { if ((np->n_flag & NMODIFIED) == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); } if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); if ((vp->v_iflag & VI_DOOMED) != 0) { NFSVOPUNLOCK(vp, 0); return (EBADF); } np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); if (!ret) { np->n_mtime = va.va_mtime; np->n_change = va.va_filerev; } } /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); } NFSVOPUNLOCK(vp, 0); return (0); } else if (!NFS_ISV4(vp)) { error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p != NULL) error = nfs_advlock_p(ap); else { NFSVOPUNLOCK(vp, 0); error = ENOLCK; } } if (error == 0 && ap->a_op == F_SETLK) { error = NFSVOPLOCK(vp, LK_SHARED); if (error == 0) { /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); NFSVOPUNLOCK(vp, 0); } } } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; if (NFS_ISV4(vp)) return (EOPNOTSUPP); error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { NFSVOPUNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, (uintmax_t)np->n_vattr.na_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int ncl_writebp(struct buf *bp, int force __unused, struct thread *td) { int oldflags, rtval; BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } oldflags = bp->b_flags; bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if ((oldflags & B_ASYNC) != 0) return (0); rtval = bufwait(bp); if (oldflags & B_DELWRI) reassignbuf(bp); brelse(bp); return (rtval); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, accmode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; vfs_timestamp(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; vfs_timestamp(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { vfs_timestamp(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call ncl_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (ncl_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_newnfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; static int nfs_getacl(struct vop_getacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_setacl(struct vop_setacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_set_text(struct vop_set_text_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np; /* * If the text file has been mmap'd, flush any dirty pages to the * buffer cache and then... * Make sure all writes are pushed to the NFS server. If this is not * done, the modify time of the file can change while the text * file is being executed. This will cause the process that is * executing the text file to be terminated. */ if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } /* Now, flush the buffer cache. */ ncl_flush(vp, MNT_WAIT, curthread, 0, 0); /* And, finally, make sure that n_mtime is up to date. */ np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_mtime = np->n_vattr.na_mtime; mtx_unlock(&np->n_mtx); vp->v_vflag |= VV_TEXT; return (0); } /* * Return POSIX pathconf information applicable to nfs filesystems. */ static int nfs_pathconf(struct vop_pathconf_args *ap) { struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; struct thread *td = curthread; int attrflag, error; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || ap->a_name == _PC_NO_TRUNC)) || (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { /* * Since only the above 4 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can * be used for _PC_NFS4_ACL as well. */ error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0) return (error); } else { /* * For NFSv2 (or NFSv3 when not one of the above 4 a_names), * just fake them. */ pc.pc_linkmax = LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; pc.pc_chownrestricted = 1; pc.pc_caseinsensitive = 0; pc.pc_casepreserving = 1; error = 0; } switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = pc.pc_linkmax; break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; break; case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = pc.pc_chownrestricted; break; case _PC_NO_TRUNC: *ap->a_retval = pc.pc_notrunc; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_PATH_MAX: if (NFS_ISV4(vp)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; break; case _PC_MAC_PRESENT: *ap->a_retval = 0; break; case _PC_ASYNC_IO: /* _PC_ASYNC_IO should have been handled by upper layers. */ KASSERT(0, ("_PC_ASYNC_IO should not get here")); error = EINVAL; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: if (NFS_ISV34(vp)) *ap->a_retval = 64; else *ap->a_retval = 32; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; default: error = EINVAL; break; } return (error); } Index: head/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdport.c (revision 318735) +++ head/sys/fs/nfsserver/nfs_nfsdport.c (revision 318736) @@ -1,3424 +1,3441 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, int vpislocked) { int error, lockedit = 0; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } error = VOP_GETATTR(vp, &nvap->na_vattr, cred); if (lockedit != 0) NFSVOPUNLOCK(vp, 0); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error; error = VOP_SETATTR(vp, &nvap->na_vattr, cred); NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0, crossmnt; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp, 0); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; crossmnt = 1; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; crossmnt = 0; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp, 0); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp, 0); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; struct iovec *ivp = iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; int i, len, tlen, error = 0; len = 0; i = 0; while (len < NFS_MAXPATHLEN) { NFSMGET(mp); MCLGET(mp, M_WAITOK); mp->m_len = M_SIZE(mp); if (len == 0) { mp3 = mp2 = mp; } else { mp2->m_next = mp; mp2 = mp; } if ((len + mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN - len; len = NFS_MAXPATHLEN; } else { len += mp->m_len; } ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; int i; struct iovec *iv; struct iovec *iv2; int error = 0, len, left, siz, tlen, ioflag = 0; struct mbuf *m2 = NULL, *m3; struct uio io, *uiop = &io; struct nfsheur *nh; len = left = NFSM_RNDUP(cnt); m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsvno_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); FREE((caddr_t)iv2, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = NULL; } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m2; out: NFSEXITCODE(error); return (error); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *ivp; int i, len; struct iovec *iv; int ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsvno_write"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } if (stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; FREE((caddr_t)iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) { u_quad_t tempsize; int error; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will MALLOC() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, p); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp; int error = 0; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp, 0); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { int error; error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VI_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (ESTALE); } (void) VOP_SETATTR(vp, &va, cred); (void) nfsvno_getattr(vp, nvap, cred, p, 1); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { int error; error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. */ if (siz <= 0) siz = cnt; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ #if 0 if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } #endif } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { nd->nd_repstat = NFSERR_BAD_COOKIE; } } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* - * For now ZFS requires VOP_LOOKUP as a workaround. Until ino_t is changed - * to 64 bit type a ZFS filesystem with over 1 billion files in it - * will suffer from 64bit -> 32bit truncation. + * Check to see if entries in this directory can be safely acquired + * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. + * ZFS snapshot directories need VOP_LOOKUP(), so that any + * automount of the snapshot directory that is required will + * be done. + * This needs to be done here for NFSv4, since NFSv4 never does + * a VFS_VGET() for "." or "..". */ - if (is_zfs == 1) - usevget = 0; + if (is_zfs == 1) { + r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); + if (r == EOPNOTSUPP) { + usevget = 0; + cn.cn_nameiop = LOOKUP; + cn.cn_lkflags = LK_SHARED | LK_RETRY; + cn.cn_cred = nd->nd_cred; + cn.cn_thread = p; + } else if (r == 0) + vput(nvp); + } - cn.cn_nameiop = LOOKUP; - cn.cn_lkflags = LK_SHARED | LK_RETRY; - cn.cn_cred = nd->nd_cred; - cn.cn_thread = p; - /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { - usevget = 0; + if (usevget) { + usevget = 0; + cn.cn_nameiop = LOOKUP; + cn.cn_lkflags = + LK_SHARED | + LK_RETRY; + cn.cn_cred = + nd->nd_cred; + cn.cn_thread = p; + } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp, 0); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } if (!r) { if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd->nd_cred, p, 1); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } } else { nvp = NULL; } if (r) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { if (nvp != NULL) vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp, 0); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { newnfs_trimtrailing(nd, mb0, bpos0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else newnfs_trimtrailing(nd, mb1, bpos1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, p); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, p); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite, struct thread *p) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = EACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_activevnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int exflags, error = 0, numsecflavor, *secflavors, i; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, &secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; cap_rights_t rights; int error; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof (nfsdarg)); if (error) goto out; error = nfsrvd_nfsd(td, &nfsdarg); } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct file *fp = NULL; int stablefd, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) error = nfsrv_v4rootexport(&export, cred, p); } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = (struct nfsd_dumpclients *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, CAST_USER_ADDR_T(dumplist.ndl_list), len); free((caddr_t)dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = (struct nfsd_dumplocks *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); free((caddr_t)dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: head/sys/kern/capabilities.conf =================================================================== --- head/sys/kern/capabilities.conf (revision 318735) +++ head/sys/kern/capabilities.conf (revision 318736) @@ -1,738 +1,743 @@ ## ## Copyright (c) 2008-2010 Robert N. M. Watson ## All rights reserved. ## ## This software was developed at the University of Cambridge Computer ## Laboratory with support from a grant from Google, Inc. ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## 1. Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## 2. Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ## ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ## SUCH DAMAGE. ## ## List of system calls enabled in capability mode, one name per line. ## ## Notes: ## - sys_exit(2), abort2(2) and close(2) are very important. ## - Sorted alphabetically, please keep it that way. ## ## $FreeBSD$ ## ## ## Allow ACL and MAC label operations by file descriptor, subject to ## capability rights. Allow MAC label operations on the current process but ## we will need to scope __mac_get_pid(2). ## __acl_aclcheck_fd __acl_delete_fd __acl_get_fd __acl_set_fd __mac_get_fd #__mac_get_pid __mac_get_proc __mac_set_fd __mac_set_proc ## ## Allow sysctl(2) as we scope internal to the call; this is a global ## namespace, but there are several critical sysctls required for almost ## anything to run, such as hw.pagesize. For now that policy lives in the ## kernel for performance and simplicity, but perhaps it could move to a ## proxying daemon in userspace. ## __sysctl ## ## Allow umtx operations as these are scoped by address space. ## ## XXRW: Need to check this very carefully. ## _umtx_op ## ## Allow process termination using abort2(2). ## abort2 ## ## Allow accept(2) since it doesn't manipulate namespaces directly, rather ## relies on existing bindings on a socket, subject to capability rights. ## accept accept4 ## ## Allow AIO operations by file descriptor, subject to capability rights. ## aio_cancel aio_error aio_fsync aio_read aio_return aio_suspend aio_waitcomplete aio_write ## ## audit(2) is a global operation, submitting to the global trail, but it is ## controlled by privilege, and it might be useful to be able to submit ## records from sandboxes. For now, disallow, but we may want to think about ## providing some sort of proxy service for this. ## #audit ## ## Allow bindat(2). ## bindat ## ## Allow capability mode and capability system calls. ## cap_enter cap_fcntls_get cap_fcntls_limit cap_getmode cap_ioctls_get cap_ioctls_limit __cap_rights_get cap_rights_limit ## ## Allow read-only clock operations. ## clock_getres clock_gettime ## ## Always allow file descriptor close(2). ## close closefrom ## ## Allow connectat(2). ## connectat ## ## cpuset(2) and related calls require scoping by process, but should ## eventually be allowed, at least in the current process case. ## #cpuset #cpuset_getaffinity #cpuset_getid #cpuset_setaffinity #cpuset_setid ## ## Always allow dup(2) and dup2(2) manipulation of the file descriptor table. ## dup dup2 ## ## Allow extended attribute operations by file descriptor, subject to ## capability rights. ## extattr_delete_fd extattr_get_fd extattr_list_fd extattr_set_fd ## ## Allow changing file flags, mode, and owner by file descriptor, subject to ## capability rights. ## fchflags fchmod fchown ## ## For now, allow fcntl(2), subject to capability rights, but this probably ## needs additional scoping. ## fcntl ## ## Allow fexecve(2), subject to capability rights. We perform some scoping, ## such as disallowing privilege escalation. ## fexecve ## ## Allow flock(2), subject to capability rights. ## flock ## ## Allow fork(2), even though it returns pids -- some applications seem to ## prefer this interface. ## fork ## ## Allow fpathconf(2), subject to capability rights. ## fpathconf ## ## Allow various file descriptor-based I/O operations, subject to capability ## rights. ## +freebsd11_fstat +freebsd11_fstatat +freebsd11_getdirentries +freebsd11_fstatfs +freebsd11_mknodat freebsd6_ftruncate freebsd6_lseek freebsd6_mmap freebsd6_pread freebsd6_pwrite ## ## Allow querying file and file system state with fstat(2) and fstatfs(2), ## subject to capability rights. ## fstat fstatfs ## ## Allow further file descriptor-based I/O operations, subject to capability ## rights. ## fsync ftruncate ## ## Allow futimens(2) and futimes(2), subject to capability rights. ## futimens futimes ## ## Allow querying process audit state, subject to normal access control. ## getaudit getaudit_addr getauid ## ## Allow thread context management with getcontext(2). ## getcontext ## ## Allow directory I/O on a file descriptor, subject to capability rights. ## Originally we had separate capabilities for directory-specific read ## operations, but on BSD we allow reading the raw directory data, so we just ## rely on CAP_READ now. ## getdents getdirentries ## ## Allow querying certain trivial global state. ## getdomainname ## ## Allow querying certain per-process resource limit state. ## getdtablesize ## ## Allow querying current process credential state. ## getegid geteuid ## ## Allow querying certain trivial global state. ## gethostid gethostname ## ## Allow querying per-process timer. ## getitimer ## ## Allow querying current process credential state. ## getgid getgroups getlogin ## ## Allow querying certain trivial global state. ## getpagesize getpeername ## ## Allow querying certain per-process scheduling, resource limit, and ## credential state. ## ## XXXRW: getpgid(2) needs scoping. It's not clear if it's worth scoping ## getppid(2). getpriority(2) needs scoping. getrusage(2) needs scoping. ## getsid(2) needs scoping. ## getpgid getpgrp getpid getppid getpriority getresgid getresuid getrlimit getrusage getsid ## ## Allow querying socket state, subject to capability rights. ## ## XXXRW: getsockopt(2) may need more attention. ## getsockname getsockopt ## ## Allow querying the global clock. ## gettimeofday ## ## Allow querying current process credential state. ## getuid ## ## Allow ioctl(2), which hopefully will be limited by applications only to ## required commands with cap_ioctls_limit(2) syscall. ## ioctl ## ## Allow querying current process credential state. ## issetugid ## ## Allow kevent(2), as we will authorize based on capability rights on the ## target descriptor. ## kevent ## ## Allow kill(2), as we allow the process to send signals only to himself. ## kill ## ## Allow message queue operations on file descriptors, subject to capability ## rights. ## kmq_notify kmq_setattr kmq_timedreceive kmq_timedsend ## ## Allow kqueue(2), we will control use. ## kqueue ## ## Allow managing per-process timers. ## ktimer_create ktimer_delete ktimer_getoverrun ktimer_gettime ktimer_settime ## ## We can't allow ktrace(2) because it relies on a global namespace, but we ## might want to introduce an fktrace(2) of some sort. ## #ktrace ## ## Allow AIO operations by file descriptor, subject to capability rights. ## lio_listio ## ## Allow listen(2), subject to capability rights. ## ## XXXRW: One might argue this manipulates a global namespace. ## listen ## ## Allow I/O-related file descriptors, subject to capability rights. ## lseek ## ## Allow simple VM operations on the current process. ## madvise mincore minherit mlock mlockall ## ## Allow memory mapping a file descriptor, and updating protections, subject ## to capability rights. ## mmap mprotect ## ## Allow simple VM operations on the current process. ## msync munlock munlockall munmap ## ## Allow the current process to sleep. ## nanosleep ## ## Allow querying the global clock. ## ntp_gettime ## ## Allow AIO operations by file descriptor, subject to capability rights. ## oaio_read oaio_write ## ## Allow simple VM operations on the current process. ## obreak ## ## Allow AIO operations by file descriptor, subject to capability rights. ## olio_listio ## ## Operations relative to directory capabilities. ## chflagsat faccessat fchmodat fchownat fstatat futimesat linkat mkdirat mkfifoat mknodat openat readlinkat renameat symlinkat unlinkat utimensat ## ## Process descriptor-related system calls are allowed. ## pdfork pdgetpid pdkill #pdwait4 # not yet implemented ## ## Allow pipe(2). ## pipe pipe2 ## ## Allow poll(2), which will be scoped by capability rights. ## XXXRW: We don't yet do that scoping. ## poll ## ## Allow I/O-related file descriptors, subject to capability rights. ## pread preadv ## ## Allow access to profiling state on the current process. ## profil ## ## Disallow ptrace(2) for now, but we do need debugging facilities in ## capability mode, so we will want to revisit this, possibly by scoping its ## operation. ## #ptrace ## ## Allow I/O-related file descriptors, subject to capability rights. ## pwrite pwritev read readv recv recvfrom recvmsg ## ## Allow real-time scheduling primitives to be used. ## ## XXXRW: These require scoping. ## rtprio rtprio_thread ## ## Allow simple VM operations on the current process. ## sbrk ## ## Allow querying trivial global scheduler state. ## sched_get_priority_max sched_get_priority_min ## ## Allow various thread/process scheduler operations. ## ## XXXRW: Some of these require further scoping. ## sched_getparam sched_getscheduler sched_rr_get_interval sched_setparam sched_setscheduler sched_yield ## ## Allow I/O-related file descriptors, subject to capability rights. ## sctp_generic_recvmsg sctp_generic_sendmsg sctp_generic_sendmsg_iov sctp_peeloff ## ## Allow pselect(2) and select(2), which will be scoped by capability rights. ## ## XXXRW: But is it? ## pselect select ## ## Allow I/O-related file descriptors, subject to capability rights. Use of ## explicit addresses here is restricted by the system calls themselves. ## send sendfile sendmsg sendto ## ## Allow setting per-process audit state, which is controlled separately by ## privileges. ## setaudit setaudit_addr setauid ## ## Allow setting thread context. ## setcontext ## ## Allow setting current process credential state, which is controlled ## separately by privilege. ## setegid seteuid setgid ## ## Allow use of the process interval timer. ## setitimer ## ## Allow setpriority(2). ## ## XXXRW: Requires scoping. ## setpriority ## ## Allow setting current process credential state, which is controlled ## separately by privilege. ## setregid setresgid setresuid setreuid ## ## Allow setting process resource limits with setrlimit(2). ## setrlimit ## ## Allow creating a new session with setsid(2). ## setsid ## ## Allow setting socket options with setsockopt(2), subject to capability ## rights. ## ## XXXRW: Might require scoping. ## setsockopt ## ## Allow setting current process credential state, which is controlled ## separately by privilege. ## setuid ## ## shm_open(2) is scoped so as to allow only access to new anonymous objects. ## shm_open ## ## Allow I/O-related file descriptors, subject to capability rights. ## shutdown ## ## Allow signal control on current process. ## sigaction sigaltstack sigblock sigpending sigprocmask sigqueue sigreturn sigsetmask sigstack sigsuspend sigtimedwait sigvec sigwaitinfo sigwait ## ## Allow creating new socket pairs with socket(2) and socketpair(2). ## socket socketpair ## ## Allow simple VM operations on the current process. ## ## XXXRW: Kernel doesn't implement this, so drop? ## sstk ## ## Do allow sync(2) for now, but possibly shouldn't. ## sync ## ## Always allow process termination with sys_exit(2). ## sys_exit ## ## sysarch(2) does rather diverse things, but is required on at least i386 ## in order to configure per-thread data. As such, it's scoped on each ## architecture. ## sysarch ## ## Allow thread operations operating only on current process. ## thr_create thr_exit thr_kill ## ## Disallow thr_kill2(2), as it may operate beyond the current process. ## ## XXXRW: Requires scoping. ## #thr_kill2 ## ## Allow thread operations operating only on current process. ## thr_new thr_self thr_set_name thr_suspend thr_wake ## ## Allow manipulation of the current process umask with umask(2). ## umask ## ## Allow submitting of process trace entries with utrace(2). ## utrace ## ## Allow generating UUIDs with uuidgen(2). ## uuidgen ## ## Allow I/O-related file descriptors, subject to capability rights. ## write writev ## ## Allow processes to yield(2). ## yield Index: head/sys/kern/kern_acct.c =================================================================== --- head/sys/kern/kern_acct.c (revision 318735) +++ head/sys/kern/kern_acct.c (revision 318736) @@ -1,652 +1,657 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. * * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1994 Christopher G. Demetriou * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_acct.c 8.1 (Berkeley) 6/14/93 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_trailer) == + sizeof(struct acctv2) - offsetof(struct acctv2, ac_trailer), "trailer"); +_Static_assert(sizeof(struct acctv3) - offsetof(struct acctv3, ac_len2) == + sizeof(struct acctv2) - offsetof(struct acctv2, ac_len2), "len2"); + /* * The routines implemented in this file are described in: * Leffler, et al.: The Design and Implementation of the 4.3BSD * UNIX Operating System (Addison Welley, 1989) * on pages 62-63. * On May 2007 the historic 3 bits base 8 exponent, 13 bit fraction * compt_t representation described in the above reference was replaced * with that of IEEE-754 floats. * * Arguably, to simplify accounting operations, this mechanism should * be replaced by one in which an accounting log file (similar to /dev/klog) * is read by a user process, etc. However, that has its own problems. */ /* Floating point definitions from . */ #define FLT_MANT_DIG 24 /* p */ #define FLT_MAX_EXP 128 /* emax */ /* * Internal accounting functions. * The former's operation is described in Leffler, et al., and the latter * was provided by UCB with the 4.4BSD-Lite release */ static uint32_t encode_timeval(struct timeval); static uint32_t encode_long(long); static void acctwatch(void); static void acct_thread(void *); static int acct_disable(struct thread *, int); /* * Accounting vnode pointer, saved vnode pointer, and flags for each. * acct_sx protects against changes to the active vnode and credentials * while accounting records are being committed to disk. */ static int acct_configured; static int acct_suspended; static struct vnode *acct_vp; static struct ucred *acct_cred; static struct plimit *acct_limit; static int acct_flags; static struct sx acct_sx; SX_SYSINIT(acct, &acct_sx, "acct_sx"); /* * State of the accounting kthread. */ static int acct_state; #define ACCT_RUNNING 1 /* Accounting kthread is running. */ #define ACCT_EXITREQ 2 /* Accounting kthread should exit. */ /* * Values associated with enabling and disabling accounting */ static int acctsuspend = 2; /* stop accounting when < 2% free space left */ SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW, &acctsuspend, 0, "percentage of free disk space below which accounting stops"); static int acctresume = 4; /* resume when free space risen to > 4% */ SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW, &acctresume, 0, "percentage of free disk space above which accounting resumes"); static int acctchkfreq = 15; /* frequency (in seconds) to check space */ static int sysctl_acct_chkfreq(SYSCTL_HANDLER_ARGS) { int error, value; /* Write out the old value. */ error = SYSCTL_OUT(req, &acctchkfreq, sizeof(int)); if (error || req->newptr == NULL) return (error); /* Read in and verify the new value. */ error = SYSCTL_IN(req, &value, sizeof(int)); if (error) return (error); if (value <= 0) return (EINVAL); acctchkfreq = value; return (0); } SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW, &acctchkfreq, 0, sysctl_acct_chkfreq, "I", "frequency for checking the free space"); SYSCTL_INT(_kern, OID_AUTO, acct_configured, CTLFLAG_RD, &acct_configured, 0, "Accounting configured or not"); SYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0, "Accounting suspended or not"); /* * Accounting system call. Written based on the specification and previous * implementation done by Mark Tinguely. */ int sys_acct(struct thread *td, struct acct_args *uap) { struct nameidata nd; int error, flags, i, replacing; error = priv_check(td, PRIV_ACCT); if (error) return (error); /* * If accounting is to be started to a file, open that file for * appending and make sure it's a 'normal'. */ if (uap->path != NULL) { NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->path, td); flags = FWRITE | O_APPEND; error = vn_open(&nd, &flags, 0, NULL); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC error = mac_system_check_acct(td->td_ucred, nd.ni_vp); if (error) { VOP_UNLOCK(nd.ni_vp, 0); vn_close(nd.ni_vp, flags, td->td_ucred, td); return (error); } #endif VOP_UNLOCK(nd.ni_vp, 0); if (nd.ni_vp->v_type != VREG) { vn_close(nd.ni_vp, flags, td->td_ucred, td); return (EACCES); } #ifdef MAC } else { error = mac_system_check_acct(td->td_ucred, NULL); if (error) return (error); #endif } /* * Disallow concurrent access to the accounting vnode while we swap * it out, in order to prevent access after close. */ sx_xlock(&acct_sx); /* * Don't log spurious disable/enable messages if we are * switching from one accounting file to another due to log * rotation. */ replacing = (acct_vp != NULL && uap->path != NULL); /* * If accounting was previously enabled, kill the old space-watcher, * close the file, and (if no new file was specified, leave). Reset * the suspended state regardless of whether accounting remains * enabled. */ acct_suspended = 0; if (acct_vp != NULL) error = acct_disable(td, !replacing); if (uap->path == NULL) { if (acct_state & ACCT_RUNNING) { acct_state |= ACCT_EXITREQ; wakeup(&acct_state); } sx_xunlock(&acct_sx); return (error); } /* * Create our own plimit object without limits. It will be assigned * to exiting processes. */ acct_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) acct_limit->pl_rlimit[i].rlim_cur = acct_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; /* * Save the new accounting file vnode, and schedule the new * free space watcher. */ acct_vp = nd.ni_vp; acct_cred = crhold(td->td_ucred); acct_flags = flags; if (acct_state & ACCT_RUNNING) acct_state &= ~ACCT_EXITREQ; else { /* * Try to start up an accounting kthread. We may start more * than one, but if so the extras will commit suicide as * soon as they start up. */ error = kproc_create(acct_thread, NULL, NULL, 0, 0, "accounting"); if (error) { (void) acct_disable(td, 0); sx_xunlock(&acct_sx); log(LOG_NOTICE, "Unable to start accounting thread\n"); return (error); } } acct_configured = 1; sx_xunlock(&acct_sx); if (!replacing) log(LOG_NOTICE, "Accounting enabled\n"); return (error); } /* * Disable currently in-progress accounting by closing the vnode, dropping * our reference to the credential, and clearing the vnode's flags. */ static int acct_disable(struct thread *td, int logging) { int error; sx_assert(&acct_sx, SX_XLOCKED); error = vn_close(acct_vp, acct_flags, acct_cred, td); crfree(acct_cred); lim_free(acct_limit); acct_configured = 0; acct_vp = NULL; acct_cred = NULL; acct_flags = 0; if (logging) log(LOG_NOTICE, "Accounting disabled\n"); return (error); } /* * Write out process accounting information, on process exit. * Data to be written out is specified in Leffler, et al. * and are enumerated below. (They're also noted in the system * "acct.h" header file.) */ int acct_process(struct thread *td) { - struct acctv2 acct; + struct acctv3 acct; struct timeval ut, st, tmp; struct plimit *oldlim; struct proc *p; struct rusage ru; int t, ret; /* * Lockless check of accounting condition before doing the hard * work. */ if (acct_vp == NULL || acct_suspended) return (0); sx_slock(&acct_sx); /* * If accounting isn't enabled, don't bother. Have to check again * once we own the lock in case we raced with disabling of accounting * by another thread. */ if (acct_vp == NULL || acct_suspended) { sx_sunlock(&acct_sx); return (0); } p = td->td_proc; /* * Get process accounting information. */ sx_slock(&proctree_lock); PROC_LOCK(p); /* (1) The terminal from which the process was started */ if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp) acct.ac_tty = tty_udev(p->p_pgrp->pg_session->s_ttyp); else acct.ac_tty = NODEV; sx_sunlock(&proctree_lock); /* (2) The name of the command that ran */ bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); /* (3) The amount of user and system time that was used */ rufetchcalc(p, &ru, &ut, &st); acct.ac_utime = encode_timeval(ut); acct.ac_stime = encode_timeval(st); /* (4) The elapsed time the command ran (and its starting time) */ getboottime(&tmp); timevaladd(&tmp, &p->p_stats->p_start); acct.ac_btime = tmp.tv_sec; microuptime(&tmp); timevalsub(&tmp, &p->p_stats->p_start); acct.ac_etime = encode_timeval(tmp); /* (5) The average amount of memory used */ tmp = ut; timevaladd(&tmp, &st); /* Convert tmp (i.e. u + s) into hz units to match ru_i*. */ t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss + + ru.ru_isrss) / t); else acct.ac_mem = 0; /* (6) The number of disk I/O operations done */ acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock); /* (7) The UID and GID of the process */ acct.ac_uid = p->p_ucred->cr_ruid; acct.ac_gid = p->p_ucred->cr_rgid; /* (8) The boolean flags that tell how the process terminated, etc. */ acct.ac_flagx = p->p_acflag; /* Setup ancillary structure fields. */ acct.ac_flagx |= ANVER; acct.ac_zero = 0; - acct.ac_version = 2; + acct.ac_version = 3; acct.ac_len = acct.ac_len2 = sizeof(acct); /* * Eliminate rlimits (file size limit in particular). */ oldlim = p->p_limit; p->p_limit = lim_hold(acct_limit); PROC_UNLOCK(p); lim_free(oldlim); /* * Write the accounting information to the file. */ ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct), (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED, NULL, td); sx_sunlock(&acct_sx); return (ret); } /* FLOAT_CONVERSION_START (Regression testing; don't remove this line.) */ /* Convert timevals and longs into IEEE-754 bit patterns. */ /* Mantissa mask (MSB is implied, so subtract 1). */ #define MANT_MASK ((1 << (FLT_MANT_DIG - 1)) - 1) /* * We calculate integer values to a precision of approximately * 28 bits. * This is high-enough precision to fill the 24 float bits * and low-enough to avoid overflowing the 32 int bits. */ #define CALC_BITS 28 /* log_2(1000000). */ #define LOG2_1M 20 /* * Convert the elements of a timeval into a 32-bit word holding * the bits of a IEEE-754 float. * The float value represents the timeval's value in microsecond units. */ static uint32_t encode_timeval(struct timeval tv) { int log2_s; int val, exp; /* Unnormalized value and exponent */ int norm_exp; /* Normalized exponent */ int shift; /* * First calculate value and exponent to about CALC_BITS precision. * Note that the following conditionals have been ordered so that * the most common cases appear first. */ if (tv.tv_sec == 0) { if (tv.tv_usec == 0) return (0); exp = 0; val = tv.tv_usec; } else { /* * Calculate the value to a precision of approximately * CALC_BITS. */ log2_s = fls(tv.tv_sec) - 1; if (log2_s + LOG2_1M < CALC_BITS) { exp = 0; val = 1000000 * tv.tv_sec + tv.tv_usec; } else { exp = log2_s + LOG2_1M - CALC_BITS; val = (unsigned int)(((uint64_t)1000000 * tv.tv_sec + tv.tv_usec) >> exp); } } /* Now normalize and pack the value into an IEEE-754 float. */ norm_exp = fls(val) - 1; shift = FLT_MANT_DIG - norm_exp - 1; #ifdef ACCT_DEBUG printf("val=%d exp=%d shift=%d log2(val)=%d\n", val, exp, shift, norm_exp); printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp, ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK)); #endif return (((FLT_MAX_EXP - 1 + exp + norm_exp) << (FLT_MANT_DIG - 1)) | ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK)); } /* * Convert a non-negative long value into the bit pattern of * an IEEE-754 float value. */ static uint32_t encode_long(long val) { int norm_exp; /* Normalized exponent */ int shift; if (val == 0) return (0); if (val < 0) { log(LOG_NOTICE, "encode_long: negative value %ld in accounting record\n", val); val = LONG_MAX; } norm_exp = fls(val) - 1; shift = FLT_MANT_DIG - norm_exp - 1; #ifdef ACCT_DEBUG printf("val=%d shift=%d log2(val)=%d\n", val, shift, norm_exp); printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp, ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK)); #endif return (((FLT_MAX_EXP - 1 + norm_exp) << (FLT_MANT_DIG - 1)) | ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK)); } /* FLOAT_CONVERSION_END (Regression testing; don't remove this line.) */ /* * Periodically check the filesystem to see if accounting * should be turned on or off. Beware the case where the vnode * has been vgone()'d out from underneath us, e.g. when the file * system containing the accounting file has been forcibly unmounted. */ /* ARGSUSED */ static void acctwatch(void) { struct statfs *sp; sx_assert(&acct_sx, SX_XLOCKED); /* * If accounting was disabled before our kthread was scheduled, * then acct_vp might be NULL. If so, just ask our kthread to * exit and return. */ if (acct_vp == NULL) { acct_state |= ACCT_EXITREQ; return; } /* * If our vnode is no longer valid, tear it down and signal the * accounting thread to die. */ if (acct_vp->v_type == VBAD) { (void) acct_disable(NULL, 1); acct_state |= ACCT_EXITREQ; return; } /* * Stopping here is better than continuing, maybe it will be VBAD * next time around. */ sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); if (VFS_STATFS(acct_vp->v_mount, sp) < 0) { free(sp, M_STATFS); return; } if (acct_suspended) { if (sp->f_bavail > (int64_t)(acctresume * sp->f_blocks / 100)) { acct_suspended = 0; log(LOG_NOTICE, "Accounting resumed\n"); } } else { if (sp->f_bavail <= (int64_t)(acctsuspend * sp->f_blocks / 100)) { acct_suspended = 1; log(LOG_NOTICE, "Accounting suspended\n"); } } free(sp, M_STATFS); } /* * The main loop for the dedicated kernel thread that periodically calls * acctwatch(). */ static void acct_thread(void *dummy) { u_char pri; /* This is a low-priority kernel thread. */ pri = PRI_MAX_KERN; thread_lock(curthread); sched_prio(curthread, pri); thread_unlock(curthread); /* If another accounting kthread is already running, just die. */ sx_xlock(&acct_sx); if (acct_state & ACCT_RUNNING) { sx_xunlock(&acct_sx); kproc_exit(0); } acct_state |= ACCT_RUNNING; /* Loop until we are asked to exit. */ while (!(acct_state & ACCT_EXITREQ)) { /* Perform our periodic checks. */ acctwatch(); /* * We check this flag again before sleeping since the * acctwatch() might have shut down accounting and asked us * to exit. */ if (!(acct_state & ACCT_EXITREQ)) { sx_sleep(&acct_state, &acct_sx, 0, "-", acctchkfreq * hz); } } /* * Acknowledge the exit request and shutdown. We clear both the * exit request and running flags. */ acct_state = 0; sx_xunlock(&acct_sx); kproc_exit(0); } Index: head/sys/kern/kern_descrip.c =================================================================== --- head/sys/kern/kern_descrip.c (revision 318735) +++ head/sys/kern/kern_descrip.c (revision 318736) @@ -1,4129 +1,4164 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", "file desc to leader structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities"); MALLOC_DECLARE(M_FADVISE); static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); static int fd_first_free(struct filedesc *fdp, int low, int size); static int fd_last_used(struct filedesc *fdp, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); static void fdgrowtable_exp(struct filedesc *fdp, int nfd); static void fdunused(struct filedesc *fdp, int fd); static void fdused(struct filedesc *fdp, int fd); static int getmaxfd(struct thread *td); /* * Each process has: * * - An array of open file descriptors (fd_ofiles) * - An array of file flags (fd_ofileflags) * - A bitmap recording which descriptors are in use (fd_map) * * A process starts out with NDFILE descriptors. The value of NDFILE has * been selected based the historical limit of 20 open files, and an * assumption that the majority of processes, especially short-lived * processes like shells, will never need more. * * If this initial allocation is exhausted, a larger descriptor table and * map are allocated dynamically, and the pointers in the process's struct * filedesc are updated to point to those. This is repeated every time * the process runs out of file descriptors (provided it hasn't hit its * resource limit). * * Since threads may hold references to individual descriptor table * entries, the tables are never freed. Instead, they are placed on a * linked list and freed only when the struct filedesc is released. */ #define NDFILE 20 #define NDSLOTSIZE sizeof(NDSLOTTYPE) #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) #define NDSLOT(x) ((x) / NDENTRIES) #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) /* * SLIST entry used to keep track of ofiles which must be reclaimed when * the process exits. */ struct freetable { struct fdescenttbl *ft_table; SLIST_ENTRY(freetable) ft_next; }; /* * Initial allocation: a filedesc structure + the head of SLIST used to * keep track of old ofiles + enough space for NDFILE descriptors. */ struct fdescenttbl0 { int fdt_nfiles; struct filedescent fdt_ofiles[NDFILE]; }; struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; struct fdescenttbl0 fd_dfiles; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; /* * Descriptor management. */ volatile int __exclusive_cache_line openfiles; /* actual number of open files */ struct mtx sigio_lock; /* mtx to protect pointers to sigio */ void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp); /* * If low >= size, just return low. Otherwise find the first zero bit in the * given bitmap, starting at low and not exceeding size - 1. Return size if * not found. */ static int fd_first_free(struct filedesc *fdp, int low, int size) { NDSLOTTYPE *map = fdp->fd_map; NDSLOTTYPE mask; int off, maxoff; if (low >= size) return (low); off = NDSLOT(low); if (low % NDENTRIES) { mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); if ((mask &= ~map[off]) != 0UL) return (off * NDENTRIES + ffsl(mask) - 1); ++off; } for (maxoff = NDSLOTS(size); off < maxoff; ++off) if (map[off] != ~0UL) return (off * NDENTRIES + ffsl(~map[off]) - 1); return (size); } /* * Find the highest non-zero bit in the given bitmap, starting at 0 and * not exceeding size - 1. Return -1 if not found. */ static int fd_last_used(struct filedesc *fdp, int size) { NDSLOTTYPE *map = fdp->fd_map; NDSLOTTYPE mask; int off, minoff; off = NDSLOT(size); if (size % NDENTRIES) { mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); if ((mask &= map[off]) != 0) return (off * NDENTRIES + flsl(mask) - 1); --off; } for (minoff = NDSLOT(0); off >= minoff; --off) if (map[off] != 0) return (off * NDENTRIES + flsl(map[off]) - 1); return (-1); } static int fdisused(struct filedesc *fdp, int fd) { KASSERT(fd >= 0 && fd < fdp->fd_nfiles, ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); } /* * Mark a file descriptor as used. */ static void fdused_init(struct filedesc *fdp, int fd) { KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd)); fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); } static void fdused(struct filedesc *fdp, int fd) { FILEDESC_XLOCK_ASSERT(fdp); fdused_init(fdp, fd); if (fd > fdp->fd_lastfile) fdp->fd_lastfile = fd; if (fd == fdp->fd_freefile) fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); } /* * Mark a file descriptor as unused. */ static void fdunused(struct filedesc *fdp, int fd) { FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; if (fd == fdp->fd_lastfile) fdp->fd_lastfile = fd_last_used(fdp, fd); } /* * Free a file descriptor. * * Avoid some work if fdp is about to be destroyed. */ static inline void fdefree_last(struct filedescent *fde) { filecaps_free(&fde->fde_caps); } static inline void fdfree(struct filedesc *fdp, int fd) { struct filedescent *fde; fde = &fdp->fd_ofiles[fd]; #ifdef CAPABILITIES seq_write_begin(&fde->fde_seq); #endif fdefree_last(fde); fde->fde_file = NULL; fdunused(fdp, fd); #ifdef CAPABILITIES seq_write_end(&fde->fde_seq); #endif } void pwd_ensure_dirs(void) { struct filedesc *fdp; fdp = curproc->p_fd; FILEDESC_XLOCK(fdp); if (fdp->fd_cdir == NULL) { fdp->fd_cdir = rootvnode; vrefact(rootvnode); } if (fdp->fd_rdir == NULL) { fdp->fd_rdir = rootvnode; vrefact(rootvnode); } FILEDESC_XUNLOCK(fdp); } /* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct getdtablesize_args { int dummy; }; #endif /* ARGSUSED */ int sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap) { #ifdef RACCT uint64_t lim; #endif td->td_retval[0] = min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc); #ifdef RACCT PROC_LOCK(td->td_proc); lim = racct_get_limit(td->td_proc, RACCT_NOFILE); PROC_UNLOCK(td->td_proc); if (lim < td->td_retval[0]) td->td_retval[0] = lim; #endif return (0); } /* * Duplicate a file descriptor to a particular value. * * Note: keep in mind that a potential race condition exists when closing * descriptors from a shared descriptor table (via rfork). */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { u_int from; u_int to; }; #endif /* ARGSUSED */ int sys_dup2(struct thread *td, struct dup2_args *uap) { return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to)); } /* * Duplicate a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct dup_args { u_int fd; }; #endif /* ARGSUSED */ int sys_dup(struct thread *td, struct dup_args *uap) { return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0)); } /* * The file control system call. */ #ifndef _SYS_SYSPROTO_H_ struct fcntl_args { int fd; int cmd; long arg; }; #endif /* ARGSUSED */ int sys_fcntl(struct thread *td, struct fcntl_args *uap) { return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg)); } int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg) { struct flock fl; struct __oflock ofl; intptr_t arg1; int error, newcmd; error = 0; newcmd = cmd; switch (cmd) { case F_OGETLK: case F_OSETLK: case F_OSETLKW: /* * Convert old flock structure to new. */ error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl)); fl.l_start = ofl.l_start; fl.l_len = ofl.l_len; fl.l_pid = ofl.l_pid; fl.l_type = ofl.l_type; fl.l_whence = ofl.l_whence; fl.l_sysid = 0; switch (cmd) { case F_OGETLK: newcmd = F_GETLK; break; case F_OSETLK: newcmd = F_SETLK; break; case F_OSETLKW: newcmd = F_SETLKW; break; } arg1 = (intptr_t)&fl; break; case F_GETLK: case F_SETLK: case F_SETLKW: case F_SETLK_REMOTE: error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl)); arg1 = (intptr_t)&fl; break; default: arg1 = arg; break; } if (error) return (error); error = kern_fcntl(td, fd, newcmd, arg1); if (error) return (error); if (cmd == F_OGETLK) { ofl.l_start = fl.l_start; ofl.l_len = fl.l_len; ofl.l_pid = fl.l_pid; ofl.l_type = fl.l_type; ofl.l_whence = fl.l_whence; error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl)); } else if (cmd == F_GETLK) { error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl)); } return (error); } int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; struct file *fp, *fp2; struct filedescent *fde; struct proc *p; struct vnode *vp; cap_rights_t rights; int error, flg, tmp; uint64_t bsize; off_t foffset; error = 0; flg = F_POSIX; p = td->td_proc; fdp = p->p_fd; AUDIT_ARG_FD(cmd); AUDIT_ARG_CMD(cmd); switch (cmd) { case F_DUPFD: tmp = arg; error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp); break; case F_DUPFD_CLOEXEC: tmp = arg; error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp); break; case F_DUP2FD: tmp = arg; error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp); break; case F_DUP2FD_CLOEXEC: tmp = arg; error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp); break; case F_GETFD: error = EBADF; FILEDESC_SLOCK(fdp); fde = fdeget_locked(fdp, fd); if (fde != NULL) { td->td_retval[0] = (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; error = 0; } FILEDESC_SUNLOCK(fdp); break; case F_SETFD: error = EBADF; FILEDESC_XLOCK(fdp); fde = fdeget_locked(fdp, fd); if (fde != NULL) { fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); error = 0; } FILEDESC_XUNLOCK(fdp); break; case F_GETFL: error = fget_fcntl(td, fd, cap_rights_init(&rights, CAP_FCNTL), F_GETFL, &fp); if (error != 0) break; td->td_retval[0] = OFLAGS(fp->f_flag); fdrop(fp, td); break; case F_SETFL: error = fget_fcntl(td, fd, cap_rights_init(&rights, CAP_FCNTL), F_SETFL, &fp); if (error != 0) break; do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); if (error != 0) { fdrop(fp, td); break; } tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); if (error == 0) { fdrop(fp, td); break; } atomic_clear_int(&fp->f_flag, FNONBLOCK); tmp = 0; (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); fdrop(fp, td); break; case F_GETOWN: error = fget_fcntl(td, fd, cap_rights_init(&rights, CAP_FCNTL), F_GETOWN, &fp); if (error != 0) break; error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; fdrop(fp, td); break; case F_SETOWN: error = fget_fcntl(td, fd, cap_rights_init(&rights, CAP_FCNTL), F_SETOWN, &fp); if (error != 0) break; tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); break; case F_SETLK_REMOTE: error = priv_check(td, PRIV_NFS_LOCKD); if (error) return (error); flg = F_REMOTE; goto do_setlk; case F_SETLKW: flg |= F_WAIT; /* FALLTHROUGH F_SETLK */ case F_SETLK: do_setlk: cap_rights_init(&rights, CAP_FLOCK); error = fget_unlocked(fdp, fd, &rights, &fp, NULL); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { error = EBADF; fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); if (foffset < 0 || (flp->l_start > 0 && foffset > OFF_MAX - flp->l_start)) { error = EOVERFLOW; fdrop(fp, td); break; } flp->l_start += foffset; } vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) { error = EBADF; break; } PROC_LOCK(p->p_leader); p->p_leader->p_flag |= P_ADVLOCK; PROC_UNLOCK(p->p_leader); error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, flp, flg); break; case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) { error = EBADF; break; } PROC_LOCK(p->p_leader); p->p_leader->p_flag |= P_ADVLOCK; PROC_UNLOCK(p->p_leader); error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, flp, flg); break; case F_UNLCK: error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, flg); break; case F_UNLCKSYS: /* * Temporary api for testing remote lock * infrastructure. */ if (flg != F_REMOTE) { error = EINVAL; break; } error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCKSYS, flp, flg); break; default: error = EINVAL; break; } if (error != 0 || flp->l_type == F_UNLCK || flp->l_type == F_UNLCKSYS) { fdrop(fp, td); break; } /* * Check for a race with close. * * The vnode is now advisory locked (or unlocked, but this case * is not really important) as the caller requested. * We had to drop the filedesc lock, so we need to recheck if * the descriptor is still valid, because if it was closed * in the meantime we need to remove advisory lock from the * vnode - close on any descriptor leading to an advisory * locked vnode, removes that lock. * We will return 0 on purpose in that case, as the result of * successful advisory lock might have been externally visible * already. This is fine - effectively we pretend to the caller * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ error = fget_unlocked(fdp, fd, &rights, &fp2, NULL); if (error != 0) { fdrop(fp, td); break; } if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); } fdrop(fp, td); fdrop(fp2, td); break; case F_GETLK: error = fget_unlocked(fdp, fd, cap_rights_init(&rights, CAP_FLOCK), &fp, NULL); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { error = EBADF; fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { error = EINVAL; fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); if ((flp->l_start > 0 && foffset > OFF_MAX - flp->l_start) || (flp->l_start < 0 && foffset < OFF_MIN - flp->l_start)) { error = EOVERFLOW; fdrop(fp, td); break; } flp->l_start += foffset; } vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); fdrop(fp, td); break; case F_RDAHEAD: arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: error = fget_unlocked(fdp, fd, cap_rights_init(&rights), &fp, NULL); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); error = EBADF; break; } vp = fp->f_vnode; /* * Exclusive lock synchronizes against f_seqcount reads and * writes in sequential_heuristic(). */ error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) { fdrop(fp, td); break; } if (arg >= 0) { bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; fp->f_seqcount = (arg + bsize - 1) / bsize; atomic_set_int(&fp->f_flag, FRDAHEAD); } else { atomic_clear_int(&fp->f_flag, FRDAHEAD); } VOP_UNLOCK(vp, 0); fdrop(fp, td); break; default: error = EINVAL; break; } return (error); } static int getmaxfd(struct thread *td) { return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc)); } /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ int kern_dup(struct thread *td, u_int mode, int flags, int old, int new) { struct filedesc *fdp; struct filedescent *oldfde, *newfde; struct proc *p; struct file *delfp; int error, maxfd; p = td->td_proc; fdp = p->p_fd; MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0); MPASS(mode < FDDUP_LASTMODE); AUDIT_ARG_FD(old); /* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */ /* * Verify we have a valid descriptor to dup from and possibly to * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should * return EINVAL when the new descriptor is out of bounds. */ if (old < 0) return (EBADF); if (new < 0) return (mode == FDDUP_FCNTL ? EINVAL : EBADF); maxfd = getmaxfd(td); if (new >= maxfd) return (mode == FDDUP_FCNTL ? EINVAL : EBADF); error = EBADF; FILEDESC_XLOCK(fdp); if (fget_locked(fdp, old) == NULL) goto unlock; if ((mode == FDDUP_FIXED || mode == FDDUP_MUSTREPLACE) && old == new) { td->td_retval[0] = new; if (flags & FDDUP_FLAG_CLOEXEC) fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; error = 0; goto unlock; } /* * If the caller specified a file descriptor, make sure the file * table is large enough to hold it, and grab it. Otherwise, just * allocate a new descriptor the usual way. */ switch (mode) { case FDDUP_NORMAL: case FDDUP_FCNTL: if ((error = fdalloc(td, new, &new)) != 0) goto unlock; break; case FDDUP_MUSTREPLACE: /* Target file descriptor must exist. */ if (fget_locked(fdp, new) == NULL) goto unlock; break; case FDDUP_FIXED: if (new >= fdp->fd_nfiles) { /* * The resource limits are here instead of e.g. * fdalloc(), because the file descriptor table may be * shared between processes, so we can't really use * racct_add()/racct_sub(). Instead of counting the * number of actually allocated descriptors, just put * the limit on the size of the file descriptor table. */ #ifdef RACCT if (racct_enable) { PROC_LOCK(p); error = racct_set(p, RACCT_NOFILE, new + 1); PROC_UNLOCK(p); if (error != 0) { error = EMFILE; goto unlock; } } #endif fdgrowtable_exp(fdp, new + 1); } if (!fdisused(fdp, new)) fdused(fdp, new); break; default: KASSERT(0, ("%s unsupported mode %d", __func__, mode)); } KASSERT(old != new, ("new fd is same as old")); oldfde = &fdp->fd_ofiles[old]; fhold(oldfde->fde_file); newfde = &fdp->fd_ofiles[new]; delfp = newfde->fde_file; /* * Duplicate the source descriptor. */ #ifdef CAPABILITIES seq_write_begin(&newfde->fde_seq); #endif filecaps_free(&newfde->fde_caps); memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true); if ((flags & FDDUP_FLAG_CLOEXEC) != 0) newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; #ifdef CAPABILITIES seq_write_end(&newfde->fde_seq); #endif td->td_retval[0] = new; error = 0; if (delfp != NULL) { (void) closefp(fdp, new, delfp, td, 1); FILEDESC_UNLOCK_ASSERT(fdp); } else { unlock: FILEDESC_XUNLOCK(fdp); } return (error); } /* * If sigio is on the list associated with a process or process group, * disable signalling from the device, remove sigio from the list and * free sigio. */ void funsetown(struct sigio **sigiop) { struct sigio *sigio; if (*sigiop == NULL) return; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { SIGIO_UNLOCK(); return; } *(sigio->sio_myref) = NULL; if ((sigio)->sio_pgid < 0) { struct pgrp *pg = (sigio)->sio_pgrp; PGRP_LOCK(pg); SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, sigio, sio_pgsigio); PGRP_UNLOCK(pg); } else { struct proc *p = (sigio)->sio_proc; PROC_LOCK(p); SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, sigio, sio_pgsigio); PROC_UNLOCK(p); } SIGIO_UNLOCK(); crfree(sigio->sio_ucred); free(sigio, M_SIGIO); } /* * Free a list of sigio structures. * We only need to lock the SIGIO_LOCK because we have made ourselves * inaccessible to callers of fsetown and therefore do not need to lock * the proc or pgrp struct for the list manipulation. */ void funsetownlst(struct sigiolst *sigiolst) { struct proc *p; struct pgrp *pg; struct sigio *sigio; sigio = SLIST_FIRST(sigiolst); if (sigio == NULL) return; p = NULL; pg = NULL; /* * Every entry of the list should belong * to a single proc or pgrp. */ if (sigio->sio_pgid < 0) { pg = sigio->sio_pgrp; PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); } else /* if (sigio->sio_pgid > 0) */ { p = sigio->sio_proc; PROC_LOCK_ASSERT(p, MA_NOTOWNED); } SIGIO_LOCK(); while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { *(sigio->sio_myref) = NULL; if (pg != NULL) { KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list")); KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list")); PGRP_LOCK(pg); SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio); PGRP_UNLOCK(pg); } else /* if (p != NULL) */ { KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list")); KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list")); PROC_LOCK(p); SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio); PROC_UNLOCK(p); } SIGIO_UNLOCK(); crfree(sigio->sio_ucred); free(sigio, M_SIGIO); SIGIO_LOCK(); } SIGIO_UNLOCK(); } /* * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). * * After permission checking, add a sigio structure to the sigio list for * the process or process group. */ int fsetown(pid_t pgid, struct sigio **sigiop) { struct proc *proc; struct pgrp *pgrp; struct sigio *sigio; int ret; if (pgid == 0) { funsetown(sigiop); return (0); } ret = 0; /* Allocate and fill in the new sigio out of locks. */ sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); sigio->sio_pgid = pgid; sigio->sio_ucred = crhold(curthread->td_ucred); sigio->sio_myref = sigiop; sx_slock(&proctree_lock); if (pgid > 0) { proc = pfind(pgid); if (proc == NULL) { ret = ESRCH; goto fail; } /* * Policy - Don't allow a process to FSETOWN a process * in another session. * * Remove this test to allow maximum flexibility or * restrict FSETOWN to the current process or process * group for maximum safety. */ PROC_UNLOCK(proc); if (proc->p_session != curthread->td_proc->p_session) { ret = EPERM; goto fail; } pgrp = NULL; } else /* if (pgid < 0) */ { pgrp = pgfind(-pgid); if (pgrp == NULL) { ret = ESRCH; goto fail; } PGRP_UNLOCK(pgrp); /* * Policy - Don't allow a process to FSETOWN a process * in another session. * * Remove this test to allow maximum flexibility or * restrict FSETOWN to the current process or process * group for maximum safety. */ if (pgrp->pg_session != curthread->td_proc->p_session) { ret = EPERM; goto fail; } proc = NULL; } funsetown(sigiop); if (pgid > 0) { PROC_LOCK(proc); /* * Since funsetownlst() is called without the proctree * locked, we need to check for P_WEXIT. * XXX: is ESRCH correct? */ if ((proc->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(proc); ret = ESRCH; goto fail; } SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); sigio->sio_proc = proc; PROC_UNLOCK(proc); } else { PGRP_LOCK(pgrp); SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); sigio->sio_pgrp = pgrp; PGRP_UNLOCK(pgrp); } sx_sunlock(&proctree_lock); SIGIO_LOCK(); *sigiop = sigio; SIGIO_UNLOCK(); return (0); fail: sx_sunlock(&proctree_lock); crfree(sigio->sio_ucred); free(sigio, M_SIGIO); return (ret); } /* * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). */ pid_t fgetown(sigiop) struct sigio **sigiop; { pid_t pgid; SIGIO_LOCK(); pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; SIGIO_UNLOCK(); return (pgid); } /* * Function drops the filedesc lock on return. */ static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { int error; FILEDESC_XLOCK_ASSERT(fdp); if (holdleaders) { if (td->td_proc->p_fdtol != NULL) { /* * Ask fdfree() to sleep to ensure that all relevant * process leaders can be traversed in closef(). */ fdp->fd_holdleaderscount++; } else { holdleaders = 0; } } /* * We now hold the fp reference that used to be owned by the * descriptor array. We have to unlock the FILEDESC *AFTER* * knote_fdclose to prevent a race of the fd getting opened, a knote * added, and deleteing a knote for the new fd. */ knote_fdclose(td, fd); /* * We need to notify mqueue if the object is of type mqueue. */ if (fp->f_type == DTYPE_MQUEUE) mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); if (holdleaders) { FILEDESC_XLOCK(fdp); fdp->fd_holdleaderscount--; if (fdp->fd_holdleaderscount == 0 && fdp->fd_holdleaderswakeup != 0) { fdp->fd_holdleaderswakeup = 0; wakeup(&fdp->fd_holdleaderscount); } FILEDESC_XUNLOCK(fdp); } return (error); } /* * Close a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct close_args { int fd; }; #endif /* ARGSUSED */ int sys_close(struct thread *td, struct close_args *uap) { return (kern_close(td, uap->fd)); } int kern_close(struct thread *td, int fd) { struct filedesc *fdp; struct file *fp; fdp = td->td_proc->p_fd; AUDIT_SYSCLOSE(td, fd); FILEDESC_XLOCK(fdp); if ((fp = fget_locked(fdp, fd)) == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, 1)); } /* * Close open file descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct closefrom_args { int lowfd; }; #endif /* ARGSUSED */ int sys_closefrom(struct thread *td, struct closefrom_args *uap) { struct filedesc *fdp; int fd; fdp = td->td_proc->p_fd; AUDIT_ARG_FD(uap->lowfd); /* * Treat negative starting file descriptor values identical to * closefrom(0) which closes all files. */ if (uap->lowfd < 0) uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd <= fdp->fd_lastfile; fd++) { if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); } } FILEDESC_SUNLOCK(fdp); return (0); } #if defined(COMPAT_43) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ofstat_args { int fd; struct ostat *sb; }; #endif /* ARGSUSED */ int ofstat(struct thread *td, struct ofstat_args *uap) { struct ostat oub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error == 0) { cvtstat(&ub, &oub); error = copyout(&oub, uap->sb, sizeof(oub)); } return (error); } #endif /* COMPAT_43 */ +#if defined(COMPAT_FREEBSD11) +int +freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap) +{ + struct stat sb; + struct freebsd11_stat osb; + int error; + + error = kern_fstat(td, uap->fd, &sb); + if (error != 0) + return (error); + freebsd11_cvtstat(&sb, &osb); + error = copyout(&osb, uap->sb, sizeof(osb)); + return (error); +} +#endif /* COMPAT_FREEBSD11 */ + /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fstat_args { int fd; struct stat *sb; }; #endif /* ARGSUSED */ int sys_fstat(struct thread *td, struct fstat_args *uap) { struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error == 0) error = copyout(&ub, uap->sb, sizeof(ub)); return (error); } int kern_fstat(struct thread *td, int fd, struct stat *sbp) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = fget(td, fd, cap_rights_init(&rights, CAP_FSTAT), &fp); if (error != 0) return (error); AUDIT_ARG_FILE(td->td_proc, fp); error = fo_stat(fp, sbp, td->td_ucred, td); fdrop(fp, td); +#ifdef __STAT_TIME_T_EXT + if (error == 0) { + sbp->st_atim_ext = 0; + sbp->st_mtim_ext = 0; + sbp->st_ctim_ext = 0; + sbp->st_btim_ext = 0; + } +#endif #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT)) ktrstat(sbp); #endif return (error); } +#if defined(COMPAT_FREEBSD11) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ -struct nfstat_args { +struct freebsd11_nfstat_args { int fd; struct nstat *sb; }; #endif /* ARGSUSED */ int -sys_nfstat(struct thread *td, struct nfstat_args *uap) +freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap) { struct nstat nub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error == 0) { - cvtnstat(&ub, &nub); + freebsd11_cvtnstat(&ub, &nub); error = copyout(&nub, uap->sb, sizeof(nub)); } return (error); } +#endif /* COMPAT_FREEBSD11 */ /* * Return pathconf information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fpathconf_args { int fd; int name; }; #endif /* ARGSUSED */ int sys_fpathconf(struct thread *td, struct fpathconf_args *uap) { struct file *fp; struct vnode *vp; cap_rights_t rights; int error; error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp); if (error != 0) return (error); if (uap->name == _PC_ASYNC_IO) { td->td_retval[0] = _POSIX_ASYNCHRONOUS_IO; goto out; } vp = fp->f_vnode; if (vp != NULL) { vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_PATHCONF(vp, uap->name, td->td_retval); VOP_UNLOCK(vp, 0); } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { if (uap->name != _PC_PIPE_BUF) { error = EINVAL; } else { td->td_retval[0] = PIPE_BUF; error = 0; } } else { error = EOPNOTSUPP; } out: fdrop(fp, td); return (error); } /* * Initialize filecaps structure. */ void filecaps_init(struct filecaps *fcaps) { bzero(fcaps, sizeof(*fcaps)); fcaps->fc_nioctls = -1; } /* * Copy filecaps structure allocating memory for ioctls array if needed. * * The last parameter indicates whether the fdtable is locked. If it is not and * ioctls are encountered, copying fails and the caller must lock the table. * * Note that if the table was not locked, the caller has to check the relevant * sequence counter to determine whether the operation was successful. */ int filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked) { size_t size; *dst = *src; if (src->fc_ioctls == NULL) return (0); if (!locked) return (1); KASSERT(src->fc_nioctls > 0, ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK); bcopy(src->fc_ioctls, dst->fc_ioctls, size); return (0); } /* * Move filecaps structure to the new place and clear the old place. */ void filecaps_move(struct filecaps *src, struct filecaps *dst) { *dst = *src; bzero(src, sizeof(*src)); } /* * Fill the given filecaps structure with full rights. */ static void filecaps_fill(struct filecaps *fcaps) { CAP_ALL(&fcaps->fc_rights); fcaps->fc_ioctls = NULL; fcaps->fc_nioctls = -1; fcaps->fc_fcntls = CAP_FCNTL_ALL; } /* * Free memory allocated within filecaps structure. */ void filecaps_free(struct filecaps *fcaps) { free(fcaps->fc_ioctls, M_FILECAPS); bzero(fcaps, sizeof(*fcaps)); } /* * Validate the given filecaps structure. */ static void filecaps_validate(const struct filecaps *fcaps, const char *func) { KASSERT(cap_rights_is_valid(&fcaps->fc_rights), ("%s: invalid rights", func)); KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, ("%s: invalid fcntls", func)); KASSERT(fcaps->fc_fcntls == 0 || cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL), ("%s: fcntls without CAP_FCNTL", func)); KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), ("%s: invalid ioctls", func)); KASSERT(fcaps->fc_nioctls == 0 || cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL), ("%s: ioctls without CAP_IOCTL", func)); } static void fdgrowtable_exp(struct filedesc *fdp, int nfd) { int nfd1; FILEDESC_XLOCK_ASSERT(fdp); nfd1 = fdp->fd_nfiles * 2; if (nfd1 < nfd) nfd1 = nfd; fdgrowtable(fdp, nfd1); } /* * Grow the file table to accommodate (at least) nfd descriptors. */ static void fdgrowtable(struct filedesc *fdp, int nfd) { struct filedesc0 *fdp0; struct freetable *ft; struct fdescenttbl *ntable; struct fdescenttbl *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; /* * If lastfile is -1 this struct filedesc was just allocated and we are * growing it to accommodate for the one we are going to copy from. There * is no need to have a lock on this one as it's not visible to anyone. */ if (fdp->fd_lastfile != -1) FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_files; omap = fdp->fd_map; /* compute the size of the new table */ nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ if (nnfiles <= onfiles) /* the table is already large enough */ return; /* * Allocate a new table. We need enough space for the number of * entries, file entries themselves and the struct freetable we will use * when we decommission the table and place it on the freelist. * We place the struct freetable in the middle so we don't have * to worry about padding. */ ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) + nnfiles * sizeof(ntable->fdt_ofiles[0]) + sizeof(struct freetable), M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data */ ntable->fdt_nfiles = nnfiles; memcpy(ntable->fdt_ofiles, otable->fdt_ofiles, onfiles * sizeof(ntable->fdt_ofiles[0])); /* * Allocate a new map only if the old is not large enough. It will * grow at a slower rate than the table as it can map more * entries than the table can hold. */ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy over the old data and update the pointer */ memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); fdp->fd_map = nmap; } /* * Make sure that ntable is correctly initialized before we replace * fd_files poiner. Otherwise fget_unlocked() may see inconsistent * data. */ atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable); /* * Do not free the old file table, as some threads may still * reference entries within it. Instead, place it on a freelist * which will be processed when the struct filedesc is released. * * Note that if onfiles == NDFILE, we're dealing with the original * static allocation contained within (struct filedesc0 *)fdp, * which must not be freed. */ if (onfiles > NDFILE) { ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; fdp0 = (struct filedesc0 *)fdp; ft->ft_table = otable; SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); } /* * The map does not have the same possibility of threads still * holding references to it. So always free it as long as it * does not reference the original static allocation. */ if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) free(omap, M_FILEDESC); } /* * Allocate a file descriptor for the process. */ int fdalloc(struct thread *td, int minfd, int *result) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; int fd, maxfd, allocfd; #ifdef RACCT int error; #endif FILEDESC_XLOCK_ASSERT(fdp); if (fdp->fd_freefile > minfd) minfd = fdp->fd_freefile; maxfd = getmaxfd(td); /* * Search the bitmap for a free descriptor starting at minfd. * If none is found, grow the file table. */ fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); if (fd >= maxfd) return (EMFILE); if (fd >= fdp->fd_nfiles) { allocfd = min(fd * 2, maxfd); #ifdef RACCT if (racct_enable) { PROC_LOCK(p); error = racct_set(p, RACCT_NOFILE, allocfd); PROC_UNLOCK(p); if (error != 0) return (EMFILE); } #endif /* * fd is already equal to first free descriptor >= minfd, so * we only need to grow the table and we are done. */ fdgrowtable_exp(fdp, allocfd); } /* * Perform some sanity checks, then mark the file descriptor as * used and return it to the caller. */ KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles), ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, ("file descriptor isn't free")); fdused(fdp, fd); *result = fd; return (0); } /* * Allocate n file descriptors for the process. */ int fdallocn(struct thread *td, int minfd, int *fds, int n) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; int i; FILEDESC_XLOCK_ASSERT(fdp); for (i = 0; i < n; i++) if (fdalloc(td, 0, &fds[i]) != 0) break; if (i < n) { for (i--; i >= 0; i--) fdunused(fdp, fds[i]); return (EMFILE); } return (0); } /* * Create a new open file structure and allocate a file descriptor for the * process that refers to it. We add one reference to the file for the * descriptor table and one reference for resultfp. This is to prevent us * being preempted and the entry in the descriptor table closed after we * release the FILEDESC lock. */ int falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags, struct filecaps *fcaps) { struct file *fp; int error, fd; error = falloc_noinstall(td, &fp); if (error) return (error); /* no reference held on error */ error = finstall(td, fp, &fd, flags, fcaps); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); } if (resultfp != NULL) *resultfp = fp; /* copy out result */ else fdrop(fp, td); /* release local reference */ if (resultfd != NULL) *resultfd = fd; return (0); } /* * Create a new open file structure without allocating a file descriptor. */ int falloc_noinstall(struct thread *td, struct file **resultfp) { struct file *fp; int maxuserfiles = maxfiles - (maxfiles / 20); int openfiles_new; static struct timeval lastfail; static int curfail; KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__)); openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1; if ((openfiles_new >= maxuserfiles && priv_check(td, PRIV_MAXFILES) != 0) || openfiles_new >= maxfiles) { atomic_subtract_int(&openfiles, 1); if (ppsratecheck(&lastfail, &curfail, 1)) { printf("kern.maxfiles limit exceeded by uid %i, (%s) " "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm); } return (ENFILE); } fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); refcount_init(&fp->f_count, 1); fp->f_cred = crhold(td->td_ucred); fp->f_ops = &badfileops; *resultfp = fp; return (0); } /* * Install a file in a file descriptor table. */ void _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, struct filecaps *fcaps) { struct filedescent *fde; MPASS(fp != NULL); if (fcaps != NULL) filecaps_validate(fcaps, __func__); FILEDESC_XLOCK_ASSERT(fdp); fde = &fdp->fd_ofiles[fd]; #ifdef CAPABILITIES seq_write_begin(&fde->fde_seq); #endif fde->fde_file = fp; fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0; if (fcaps != NULL) filecaps_move(fcaps, &fde->fde_caps); else filecaps_fill(&fde->fde_caps); #ifdef CAPABILITIES seq_write_end(&fde->fde_seq); #endif } int finstall(struct thread *td, struct file *fp, int *fd, int flags, struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; int error; MPASS(fd != NULL); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { FILEDESC_XUNLOCK(fdp); return (error); } fhold(fp); _finstall(fdp, fp, *fd, flags, fcaps); FILEDESC_XUNLOCK(fdp); return (0); } /* * Build a new filedesc structure from another. * Copy the current, root, and jail root vnode references. * * If fdp is not NULL, return with it shared locked. */ struct filedesc * fdinit(struct filedesc *fdp, bool prepfiles) { struct filedesc0 *newfdp0; struct filedesc *newfdp; newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO); newfdp = &newfdp0->fd_fd; /* Create the file descriptor table. */ FILEDESC_LOCK_INIT(newfdp); refcount_init(&newfdp->fd_refcnt, 1); refcount_init(&newfdp->fd_holdcnt, 1); newfdp->fd_cmask = CMASK; newfdp->fd_map = newfdp0->fd_dmap; newfdp->fd_lastfile = -1; newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; newfdp->fd_files->fdt_nfiles = NDFILE; if (fdp == NULL) return (newfdp); if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles) fdgrowtable(newfdp, fdp->fd_lastfile + 1); FILEDESC_SLOCK(fdp); newfdp->fd_cdir = fdp->fd_cdir; if (newfdp->fd_cdir) vrefact(newfdp->fd_cdir); newfdp->fd_rdir = fdp->fd_rdir; if (newfdp->fd_rdir) vrefact(newfdp->fd_rdir); newfdp->fd_jdir = fdp->fd_jdir; if (newfdp->fd_jdir) vrefact(newfdp->fd_jdir); if (!prepfiles) { FILEDESC_SUNLOCK(fdp); } else { while (fdp->fd_lastfile >= newfdp->fd_nfiles) { FILEDESC_SUNLOCK(fdp); fdgrowtable(newfdp, fdp->fd_lastfile + 1); FILEDESC_SLOCK(fdp); } } return (newfdp); } static struct filedesc * fdhold(struct proc *p) { struct filedesc *fdp; PROC_LOCK_ASSERT(p, MA_OWNED); fdp = p->p_fd; if (fdp != NULL) refcount_acquire(&fdp->fd_holdcnt); return (fdp); } static void fddrop(struct filedesc *fdp) { if (fdp->fd_holdcnt > 1) { if (refcount_release(&fdp->fd_holdcnt) == 0) return; } FILEDESC_LOCK_DESTROY(fdp); uma_zfree(filedesc0_zone, fdp); } /* * Share a filedesc structure. */ struct filedesc * fdshare(struct filedesc *fdp) { refcount_acquire(&fdp->fd_refcnt); return (fdp); } /* * Unshare a filedesc structure, if necessary by making a copy */ void fdunshare(struct thread *td) { struct filedesc *tmp; struct proc *p = td->td_proc; if (p->p_fd->fd_refcnt == 1) return; tmp = fdcopy(p->p_fd); fdescfree(td); p->p_fd = tmp; } void fdinstall_remapped(struct thread *td, struct filedesc *fdp) { fdescfree(td); td->td_proc->p_fd = fdp; } /* * Copy a filedesc structure. A NULL pointer in returns a NULL reference, * this is to ease callers, not catch errors. */ struct filedesc * fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; struct filedescent *nfde, *ofde; int i; MPASS(fdp != NULL); newfdp = fdinit(fdp, true); /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { ofde = &fdp->fd_ofiles[i]; if (ofde->fde_file == NULL || (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) { if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; continue; } nfde = &newfdp->fd_ofiles[i]; *nfde = *ofde; filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fhold(nfde->fde_file); fdused_init(newfdp, i); newfdp->fd_lastfile = i; } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); return (newfdp); } /* * Copies a filedesc structure, while remapping all file descriptors * stored inside using a translation table. * * File descriptors are copied over to the new file descriptor table, * regardless of whether the close-on-exec flag is set. */ int fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds, struct filedesc **ret) { struct filedesc *newfdp; struct filedescent *nfde, *ofde; int error, i; MPASS(fdp != NULL); newfdp = fdinit(fdp, true); if (nfds > fdp->fd_lastfile + 1) { /* New table cannot be larger than the old one. */ error = E2BIG; goto bad; } /* Copy all passable descriptors (i.e. not kqueue). */ newfdp->fd_freefile = nfds; for (i = 0; i < nfds; ++i) { if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) { /* File descriptor out of bounds. */ error = EBADF; goto bad; } ofde = &fdp->fd_ofiles[fds[i]]; if (ofde->fde_file == NULL) { /* Unused file descriptor. */ error = EBADF; goto bad; } if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) { /* File descriptor cannot be passed. */ error = EINVAL; goto bad; } nfde = &newfdp->fd_ofiles[i]; *nfde = *ofde; filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fhold(nfde->fde_file); fdused_init(newfdp, i); newfdp->fd_lastfile = i; } newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); *ret = newfdp; return (0); bad: FILEDESC_SUNLOCK(fdp); fdescfree_remapped(newfdp); return (error); } /* * Clear POSIX style locks. This is only used when fdp looses a reference (i.e. * one of processes using it exits) and the table used to be shared. */ static void fdclearlocks(struct thread *td) { struct filedesc *fdp; struct filedesc_to_leader *fdtol; struct flock lf; struct file *fp; struct proc *p; struct vnode *vp; int i; p = td->td_proc; fdp = p->p_fd; fdtol = p->p_fdtol; MPASS(fdtol != NULL); FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, ("filedesc_to_refcount botch: fdl_refcount=%d", fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (p->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX); FILEDESC_XLOCK(fdp); fdrop(fp, td); } } retry: if (fdtol->fdl_refcount == 1) { if (fdp->fd_holdleaderscount > 0 && (p->p_leader->p_flag & P_ADVLOCK) != 0) { /* * close() or kern_dup() has cleared a reference * in a shared file descriptor table. */ fdp->fd_holdleaderswakeup = 1; sx_sleep(&fdp->fd_holdleaderscount, FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); goto retry; } if (fdtol->fdl_holdcount > 0) { /* * Ensure that fdtol->fdl_leader remains * valid in closef(). */ fdtol->fdl_wakeup = 1; sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); goto retry; } } fdtol->fdl_refcount--; if (fdtol->fdl_refcount == 0 && fdtol->fdl_holdcount == 0) { fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; fdtol->fdl_prev->fdl_next = fdtol->fdl_next; } else fdtol = NULL; p->p_fdtol = NULL; FILEDESC_XUNLOCK(fdp); if (fdtol != NULL) free(fdtol, M_FILEDESC_TO_LEADER); } /* * Release a filedesc structure. */ static void fdescfree_fds(struct thread *td, struct filedesc *fdp, bool needclose) { struct filedesc0 *fdp0; struct freetable *ft, *tft; struct filedescent *fde; struct file *fp; int i; for (i = 0; i <= fdp->fd_lastfile; i++) { fde = &fdp->fd_ofiles[i]; fp = fde->fde_file; if (fp != NULL) { fdefree_last(fde); if (needclose) (void) closef(fp, td); else fdrop(fp, td); } } if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) free(fdp->fd_map, M_FILEDESC); if (fdp->fd_nfiles > NDFILE) free(fdp->fd_files, M_FILEDESC); fdp0 = (struct filedesc0 *)fdp; SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft) free(ft->ft_table, M_FILEDESC); fddrop(fdp); } void fdescfree(struct thread *td) { struct proc *p; struct filedesc *fdp; struct vnode *cdir, *jdir, *rdir; p = td->td_proc; fdp = p->p_fd; MPASS(fdp != NULL); #ifdef RACCT if (racct_enable) { PROC_LOCK(p); racct_set(p, RACCT_NOFILE, 0); PROC_UNLOCK(p); } #endif if (p->p_fdtol != NULL) fdclearlocks(td); PROC_LOCK(p); p->p_fd = NULL; PROC_UNLOCK(p); if (refcount_release(&fdp->fd_refcnt) == 0) return; FILEDESC_XLOCK(fdp); cdir = fdp->fd_cdir; fdp->fd_cdir = NULL; rdir = fdp->fd_rdir; fdp->fd_rdir = NULL; jdir = fdp->fd_jdir; fdp->fd_jdir = NULL; FILEDESC_XUNLOCK(fdp); if (cdir != NULL) vrele(cdir); if (rdir != NULL) vrele(rdir); if (jdir != NULL) vrele(jdir); fdescfree_fds(td, fdp, 1); } void fdescfree_remapped(struct filedesc *fdp) { if (fdp->fd_cdir != NULL) vrele(fdp->fd_cdir); if (fdp->fd_rdir != NULL) vrele(fdp->fd_rdir); if (fdp->fd_jdir != NULL) vrele(fdp->fd_jdir); fdescfree_fds(curthread, fdp, 0); } /* * For setugid programs, we don't want to people to use that setugidness * to generate error messages which write to a file which otherwise would * otherwise be off-limits to the process. We check for filesystems where * the vnode can change out from under us after execve (like [lin]procfs). * * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is * sufficient. We also don't check for setugidness since we know we are. */ static bool is_unsafe(struct file *fp) { struct vnode *vp; if (fp->f_type != DTYPE_VNODE) return (false); vp = fp->f_vnode; return ((vp->v_vflag & VV_PROCDEP) != 0); } /* * Make this setguid thing safe, if at all possible. */ void fdsetugidsafety(struct thread *td) { struct filedesc *fdp; struct file *fp; int i; fdp = td->td_proc->p_fd; KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); MPASS(fdp->fd_nfiles >= 3); for (i = 0; i <= 2; i++) { fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL && is_unsafe(fp)) { FILEDESC_XLOCK(fdp); knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } } } /* * If a specific file object occupies a specific file descriptor, close the * file descriptor entry and drop a reference on the file object. This is a * convenience function to handle a subsequent error in a function that calls * falloc() that handles the race that another thread might have closed the * file descriptor out from under the thread creating the file object. */ void fdclose(struct thread *td, struct file *fp, int idx) { struct filedesc *fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[idx].fde_file == fp) { fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else FILEDESC_XUNLOCK(fdp); } /* * Close any files on exec? */ void fdcloseexec(struct thread *td) { struct filedesc *fdp; struct filedescent *fde; struct file *fp; int i; fdp = td->td_proc->p_fd; KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); for (i = 0; i <= fdp->fd_lastfile; i++) { fde = &fdp->fd_ofiles[i]; fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || (fde->fde_flags & UF_EXCLOSE))) { FILEDESC_XLOCK(fdp); fdfree(fdp, i); (void) closefp(fdp, i, fp, td, 0); FILEDESC_UNLOCK_ASSERT(fdp); } } } /* * It is unsafe for set[ug]id processes to be started with file * descriptors 0..2 closed, as these descriptors are given implicit * significance in the Standard C library. fdcheckstd() will create a * descriptor referencing /dev/null for each of stdin, stdout, and * stderr that is not already open. */ int fdcheckstd(struct thread *td) { struct filedesc *fdp; register_t save; int i, error, devnull; fdp = td->td_proc->p_fd; KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); MPASS(fdp->fd_nfiles >= 3); devnull = -1; for (i = 0; i <= 2; i++) { if (fdp->fd_ofiles[i].fde_file != NULL) continue; save = td->td_retval[0]; if (devnull != -1) { error = kern_dup(td, FDDUP_FIXED, 0, devnull, i); } else { error = kern_openat(td, AT_FDCWD, "/dev/null", UIO_SYSSPACE, O_RDWR, 0); if (error == 0) { devnull = td->td_retval[0]; KASSERT(devnull == i, ("we didn't get our fd")); } } td->td_retval[0] = save; if (error != 0) return (error); } return (0); } /* * Internal form of close. Decrement reference count on file structure. * Note: td may be NULL when closing a file that was being passed in a * message. * * XXXRW: Giant is not required for the caller, but often will be held; this * makes it moderately likely the Giant will be recursed in the VFS case. */ int closef(struct file *fp, struct thread *td) { struct vnode *vp; struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor, and the thread pointer * will be NULL. Callers should be careful only to pass a * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. */ if (fp->f_type == DTYPE_VNODE && td != NULL) { vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, F_UNLCK, &lf, F_POSIX); } fdtol = td->td_proc->p_fdtol; if (fdtol != NULL) { /* * Handle special case where file descriptor table is * shared between multiple process leaders. */ fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); for (fdtol = fdtol->fdl_next; fdtol != td->td_proc->p_fdtol; fdtol = fdtol->fdl_next) { if ((fdtol->fdl_leader->p_flag & P_ADVLOCK) == 0) continue; fdtol->fdl_holdcount++; FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); FILEDESC_XLOCK(fdp); fdtol->fdl_holdcount--; if (fdtol->fdl_holdcount == 0 && fdtol->fdl_wakeup != 0) { fdtol->fdl_wakeup = 0; wakeup(fdtol); } } FILEDESC_XUNLOCK(fdp); } } return (fdrop(fp, td)); } /* * Initialize the file pointer with the specified properties. * * The ops are set with release semantics to be certain that the flags, type, * and data are visible when ops is. This is to prevent ops methods from being * called with bad data. */ void finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) { fp->f_data = data; fp->f_flag = flag; fp->f_type = type; atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp) { struct filedescent *fde; int error; FILEDESC_LOCK_ASSERT(fdp); fde = fdeget_locked(fdp, fd); if (fde == NULL) { error = EBADF; goto out; } #ifdef CAPABILITIES error = cap_check(cap_rights_fde(fde), needrightsp); if (error != 0) goto out; #endif if (havecapsp != NULL) filecaps_copy(&fde->fde_caps, havecapsp, true); *fpp = fde->fde_file; error = 0; out: return (error); } int fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp) { struct filedesc *fdp = td->td_proc->p_fd; int error; #ifndef CAPABILITIES error = fget_unlocked(fdp, fd, needrightsp, fpp, NULL); if (error == 0 && havecapsp != NULL) filecaps_fill(havecapsp); #else struct file *fp; seq_t seq; for (;;) { error = fget_unlocked(fdp, fd, needrightsp, &fp, &seq); if (error != 0) return (error); if (havecapsp != NULL) { if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecapsp, false)) { fdrop(fp, td); goto get_locked; } } if (!fd_modified(fdp, fd, seq)) break; fdrop(fp, td); } *fpp = fp; return (0); get_locked: FILEDESC_SLOCK(fdp); error = fget_cap_locked(fdp, fd, needrightsp, fpp, havecapsp); if (error == 0) fhold(*fpp); FILEDESC_SUNLOCK(fdp); #endif return (error); } int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, struct file **fpp, seq_t *seqp) { #ifdef CAPABILITIES struct filedescent *fde; #endif struct fdescenttbl *fdt; struct file *fp; u_int count; #ifdef CAPABILITIES seq_t seq; cap_rights_t haverights; int error; #endif fdt = fdp->fd_files; if ((u_int)fd >= fdt->fdt_nfiles) return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have * to use a cmpset loop rather than an atomic_add. The descriptor * must be re-verified once we acquire a reference to be certain * that the identity is still correct and we did not lose a race * due to preemption. */ for (;;) { #ifdef CAPABILITIES seq = seq_read(fd_seq(fdt, fd)); fde = &fdt->fdt_ofiles[fd]; haverights = *cap_rights_fde(fde); fp = fde->fde_file; if (!seq_consistent(fd_seq(fdt, fd), seq)) continue; #else fp = fdt->fdt_ofiles[fd].fde_file; #endif if (fp == NULL) return (EBADF); #ifdef CAPABILITIES error = cap_check(&haverights, needrightsp); if (error != 0) return (error); #endif count = fp->f_count; retry: if (count == 0) { /* * Force a reload. Other thread could reallocate the * table before this fd was closed, so it possible that * there is a stale fp pointer in cached version. */ fdt = *(struct fdescenttbl * volatile *)&(fdp->fd_files); continue; } /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ if (atomic_fcmpset_acq_int(&fp->f_count, &count, count + 1) == 0) goto retry; fdt = fdp->fd_files; #ifdef CAPABILITIES if (seq_consistent_nomb(fd_seq(fdt, fd), seq)) #else if (fp == fdt->fdt_ofiles[fd].fde_file) #endif break; fdrop(fp, curthread); } *fpp = fp; if (seqp != NULL) { #ifdef CAPABILITIES *seqp = seq; #endif } return (0); } /* * Extract the file pointer associated with the specified descriptor for the * current user process. * * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * * File's rights will be checked against the capability rights mask. * * If an error occurred the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, cap_rights_t *needrightsp, seq_t *seqp) { struct filedesc *fdp; struct file *fp; int error; *fpp = NULL; fdp = td->td_proc->p_fd; error = fget_unlocked(fdp, fd, needrightsp, &fp, seqp); if (error != 0) return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); } /* * FREAD and FWRITE failure return EBADF as per POSIX. */ error = 0; switch (flags) { case FREAD: case FWRITE: if ((fp->f_flag & flags) == 0) error = EBADF; break; case FEXEC: if ((fp->f_flag & (FREAD | FEXEC)) == 0 || ((fp->f_flag & FWRITE) != 0)) error = EBADF; break; case 0: break; default: KASSERT(0, ("wrong flags")); } if (error != 0) { fdrop(fp, td); return (error); } *fpp = fp; return (0); } int fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, 0, rightsp, NULL)); } int fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, u_char *maxprotp, struct file **fpp) { int error; #ifndef CAPABILITIES error = _fget(td, fd, fpp, 0, rightsp, NULL); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #else struct filedesc *fdp = td->td_proc->p_fd; seq_t seq; MPASS(cap_rights_is_set(rightsp, CAP_MMAP)); for (;;) { error = _fget(td, fd, fpp, 0, rightsp, &seq); if (error != 0) return (error); /* * If requested, convert capability rights to access flags. */ if (maxprotp != NULL) *maxprotp = cap_rights_to_vmprot(cap_rights(fdp, fd)); if (!fd_modified(fdp, fd, seq)) break; fdrop(*fpp, td); } #endif return (error); } int fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, FREAD, rightsp, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, FWRITE, rightsp, NULL)); } int fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl, struct file **fpp) { struct filedesc *fdp = td->td_proc->p_fd; #ifndef CAPABILITIES return (fget_unlocked(fdp, fd, rightsp, fpp, NULL)); #else int error; seq_t seq; MPASS(cap_rights_is_set(rightsp, CAP_FCNTL)); for (;;) { error = fget_unlocked(fdp, fd, rightsp, fpp, &seq); if (error != 0) return (error); error = cap_fcntl_check(fdp, fd, needfcntl); if (!fd_modified(fdp, fd, seq)) break; fdrop(*fpp, td); } if (error != 0) { fdrop(*fpp, td); *fpp = NULL; } return (error); #endif } /* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. * * XXX: what about the unused flags ? */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp, struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; error = _fget(td, fd, &fp, flags, needrightsp, NULL); if (error != 0) return (error); if (fp->f_vnode == NULL) { error = EINVAL; } else { *vpp = fp->f_vnode; vrefact(*vpp); } fdrop(fp, td); return (error); } int fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, 0, rightsp, vpp)); } int fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp, struct filecaps *havecaps, struct vnode **vpp) { struct filedesc *fdp; struct filecaps caps; struct file *fp; int error; fdp = td->td_proc->p_fd; error = fget_cap_locked(fdp, fd, needrightsp, &fp, &caps); if (error != 0) return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto out; } if (fp->f_vnode == NULL) { error = EINVAL; goto out; } *havecaps = caps; *vpp = fp->f_vnode; vrefact(*vpp); return (0); out: filecaps_free(&caps); return (error); } int fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FREAD, rightsp, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FEXEC, rightsp, vpp)); } #ifdef notyet int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FWRITE, rightsp, vpp)); } #endif /* * Handle the last reference to a file being closed. */ int _fdrop(struct file *fp, struct thread *td) { int error; if (fp->f_count != 0) panic("fdrop: count %d", fp->f_count); error = fo_close(fp, td); atomic_subtract_int(&openfiles, 1); crfree(fp->f_cred); free(fp->f_advice, M_FADVISE); uma_zfree(file_zone, fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on the entire file * (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ #ifndef _SYS_SYSPROTO_H_ struct flock_args { int fd; int how; }; #endif /* ARGSUSED */ int sys_flock(struct thread *td, struct flock_args *uap) { struct file *fp; struct vnode *vp; struct flock lf; cap_rights_t rights; int error; error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FLOCK), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); return (EOPNOTSUPP); } vp = fp->f_vnode; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; atomic_clear_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto done2; } if (uap->how & LOCK_EX) lf.l_type = F_WRLCK; else if (uap->how & LOCK_SH) lf.l_type = F_RDLCK; else { error = EBADF; goto done2; } atomic_set_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done2: fdrop(fp, td); return (error); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) { struct filedescent *newfde, *oldfde; struct file *fp; int error, indx; KASSERT(openerror == ENODEV || openerror == ENXIO, ("unexpected error %d in %s", openerror, __func__)); /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, then reject. */ FILEDESC_XLOCK(fdp); if ((fp = fget_locked(fdp, dfd)) == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } error = fdalloc(td, 0, &indx); if (error != 0) { FILEDESC_XUNLOCK(fdp); return (error); } /* * There are two cases of interest here. * * For ENODEV simply dup (dfd) to file descriptor (indx) and return. * * For ENXIO steal away the file structure from (dfd) and store it in * (indx). (dfd) is effectively closed by this operation. */ switch (openerror) { case ENODEV: /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { fdunused(fdp, indx); FILEDESC_XUNLOCK(fdp); return (EACCES); } fhold(fp); newfde = &fdp->fd_ofiles[indx]; oldfde = &fdp->fd_ofiles[dfd]; #ifdef CAPABILITIES seq_write_begin(&newfde->fde_seq); #endif memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true); #ifdef CAPABILITIES seq_write_end(&newfde->fde_seq); #endif break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ newfde = &fdp->fd_ofiles[indx]; oldfde = &fdp->fd_ofiles[dfd]; #ifdef CAPABILITIES seq_write_begin(&newfde->fde_seq); #endif memcpy(newfde, oldfde, fde_change_size); oldfde->fde_file = NULL; fdunused(fdp, dfd); #ifdef CAPABILITIES seq_write_end(&newfde->fde_seq); #endif break; } FILEDESC_XUNLOCK(fdp); *indxp = indx; return (0); } /* * This sysctl determines if we will allow a process to chroot(2) if it * has a directory open: * 0: disallowed for all processes. * 1: allowed for processes that were not already chroot(2)'ed. * 2: allowed for all processes. */ static int chroot_allow_open_directories = 1; SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, &chroot_allow_open_directories, 0, "Allow a process to chroot(2) if it has a directory open"); /* * Helper function for raised chroot(2) security function: Refuse if * any filedescriptors are open directories. */ static int chroot_refuse_vdir_fds(struct filedesc *fdp) { struct vnode *vp; struct file *fp; int fd; FILEDESC_LOCK_ASSERT(fdp); for (fd = 0; fd <= fdp->fd_lastfile; fd++) { fp = fget_locked(fdp, fd); if (fp == NULL) continue; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vp->v_type == VDIR) return (EPERM); } } return (0); } /* * Common routine for kern_chroot() and jail_attach(). The caller is * responsible for invoking priv_check() and mac_vnode_check_chroot() to * authorize this operation. */ int pwd_chroot(struct thread *td, struct vnode *vp) { struct filedesc *fdp; struct vnode *oldvp; int error; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); if (error != 0) { FILEDESC_XUNLOCK(fdp); return (error); } } oldvp = fdp->fd_rdir; vrefact(vp); fdp->fd_rdir = vp; if (fdp->fd_jdir == NULL) { vrefact(vp); fdp->fd_jdir = vp; } FILEDESC_XUNLOCK(fdp); vrele(oldvp); return (0); } void pwd_chdir(struct thread *td, struct vnode *vp) { struct filedesc *fdp; struct vnode *oldvp; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); VNASSERT(vp->v_usecount > 0, vp, ("chdir to a vnode with zero usecount")); oldvp = fdp->fd_cdir; fdp->fd_cdir = vp; FILEDESC_XUNLOCK(fdp); vrele(oldvp); } /* * Scan all active processes and prisons to see if any of them have a current * or root directory of `olddp'. If so, replace them with the new mount point. */ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) { struct filedesc *fdp; struct prison *pr; struct proc *p; int nrele; if (vrefcnt(olddp) == 1) return; nrele = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; FILEDESC_XLOCK(fdp); if (fdp->fd_cdir == olddp) { vrefact(newdp); fdp->fd_cdir = newdp; nrele++; } if (fdp->fd_rdir == olddp) { vrefact(newdp); fdp->fd_rdir = newdp; nrele++; } if (fdp->fd_jdir == olddp) { vrefact(newdp); fdp->fd_jdir = newdp; nrele++; } FILEDESC_XUNLOCK(fdp); fddrop(fdp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { vrefact(newdp); rootvnode = newdp; nrele++; } mtx_lock(&prison0.pr_mtx); if (prison0.pr_root == olddp) { vrefact(newdp); prison0.pr_root = newdp; nrele++; } mtx_unlock(&prison0.pr_mtx); sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { mtx_lock(&pr->pr_mtx); if (pr->pr_root == olddp) { vrefact(newdp); pr->pr_root = newdp; nrele++; } mtx_unlock(&pr->pr_mtx); } sx_sunlock(&allprison_lock); while (nrele--) vrele(olddp); } struct filedesc_to_leader * filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) { struct filedesc_to_leader *fdtol; fdtol = malloc(sizeof(struct filedesc_to_leader), M_FILEDESC_TO_LEADER, M_WAITOK); fdtol->fdl_refcount = 1; fdtol->fdl_holdcount = 0; fdtol->fdl_wakeup = 0; fdtol->fdl_leader = leader; if (old != NULL) { FILEDESC_XLOCK(fdp); fdtol->fdl_next = old->fdl_next; fdtol->fdl_prev = old; old->fdl_next = fdtol; fdtol->fdl_next->fdl_prev = fdtol; FILEDESC_XUNLOCK(fdp); } else { fdtol->fdl_next = fdtol; fdtol->fdl_prev = fdtol; } return (fdtol); } static int sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS) { struct filedesc *fdp; int i, count, slots; if (*(int *)arg1 != 0) return (EINVAL); fdp = curproc->p_fd; count = 0; FILEDESC_SLOCK(fdp); slots = NDSLOTS(fdp->fd_lastfile + 1); for (i = 0; i < slots; i++) count += bitcountl(fdp->fd_map[i]); FILEDESC_SUNLOCK(fdp); return (SYSCTL_OUT(req, &count, sizeof(count))); } static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds, CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds, "Number of open file descriptors"); /* * Get file structures globally. */ static int sysctl_kern_file(SYSCTL_HANDLER_ARGS) { struct xfile xf; struct filedesc *fdp; struct file *fp; struct proc *p; int error, n; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); if (req->oldptr == NULL) { n = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; /* overestimates sparse tables. */ if (fdp->fd_lastfile > 0) n += fdp->fd_lastfile; fddrop(fdp); } sx_sunlock(&allproc_lock); return (SYSCTL_OUT(req, 0, n * sizeof(xf))); } error = 0; bzero(&xf, sizeof(xf)); xf.xf_size = sizeof(xf); sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } if (p_cansee(req->td, p) != 0) { PROC_UNLOCK(p); continue; } xf.xf_pid = p->p_pid; xf.xf_uid = p->p_ucred->cr_uid; fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) { if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; xf.xf_data = fp->f_data; xf.xf_vnode = fp->f_vnode; xf.xf_type = fp->f_type; xf.xf_count = fp->f_count; xf.xf_msgcount = 0; xf.xf_offset = foffset_get(fp); xf.xf_flag = fp->f_flag; error = SYSCTL_OUT(req, &xf, sizeof(xf)); if (error) break; } FILEDESC_SUNLOCK(fdp); fddrop(fdp); if (error) break; } sx_sunlock(&allproc_lock); return (error); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); #ifdef KINFO_FILE_SIZE CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); #endif static int xlate_fflags(int fflags) { static const struct { int fflag; int kf_fflag; } fflags_table[] = { { FAPPEND, KF_FLAG_APPEND }, { FASYNC, KF_FLAG_ASYNC }, { FFSYNC, KF_FLAG_FSYNC }, { FHASLOCK, KF_FLAG_HASLOCK }, { FNONBLOCK, KF_FLAG_NONBLOCK }, { FREAD, KF_FLAG_READ }, { FWRITE, KF_FLAG_WRITE }, { O_CREAT, KF_FLAG_CREAT }, { O_DIRECT, KF_FLAG_DIRECT }, { O_EXCL, KF_FLAG_EXCL }, { O_EXEC, KF_FLAG_EXEC }, { O_EXLOCK, KF_FLAG_EXLOCK }, { O_NOFOLLOW, KF_FLAG_NOFOLLOW }, { O_SHLOCK, KF_FLAG_SHLOCK }, { O_TRUNC, KF_FLAG_TRUNC } }; unsigned int i; int kflags; kflags = 0; for (i = 0; i < nitems(fflags_table); i++) if (fflags & fflags_table[i].fflag) kflags |= fflags_table[i].kf_fflag; return (kflags); } /* Trim unused data from kf_path by truncating the structure size. */ static void pack_kinfo(struct kinfo_file *kif) { kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + strlen(kif->kf_path) + 1; kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); } static void export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp, struct kinfo_file *kif, struct filedesc *fdp, int flags) { int error; bzero(kif, sizeof(*kif)); /* Set a default type to allow for empty fill_kinfo() methods. */ kif->kf_type = KF_TYPE_UNKNOWN; kif->kf_flags = xlate_fflags(fp->f_flag); if (rightsp != NULL) kif->kf_cap_rights = *rightsp; else cap_rights_init(&kif->kf_cap_rights); kif->kf_fd = fd; kif->kf_ref_count = fp->f_count; kif->kf_offset = foffset_get(fp); /* * This may drop the filedesc lock, so the 'fp' cannot be * accessed after this call. */ error = fo_fill_kinfo(fp, kif, fdp); if (error == 0) kif->kf_status |= KF_ATTR_VALID; if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) pack_kinfo(kif); else kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); } static void export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags, struct kinfo_file *kif, int flags) { int error; bzero(kif, sizeof(*kif)); kif->kf_type = KF_TYPE_VNODE; error = vn_fill_kinfo_vnode(vp, kif); if (error == 0) kif->kf_status |= KF_ATTR_VALID; kif->kf_flags = xlate_fflags(fflags); cap_rights_init(&kif->kf_cap_rights); kif->kf_fd = fd; kif->kf_ref_count = -1; kif->kf_offset = -1; if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) pack_kinfo(kif); else kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); vrele(vp); } struct export_fd_buf { struct filedesc *fdp; struct sbuf *sb; ssize_t remainder; struct kinfo_file kif; int flags; }; static int export_kinfo_to_sb(struct export_fd_buf *efbuf) { struct kinfo_file *kif; kif = &efbuf->kif; if (efbuf->remainder != -1) { if (efbuf->remainder < kif->kf_structsize) { /* Terminate export. */ efbuf->remainder = 0; return (0); } efbuf->remainder -= kif->kf_structsize; } return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM); } static int export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp, struct export_fd_buf *efbuf) { int error; if (efbuf->remainder == 0) return (0); export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp, efbuf->flags); FILEDESC_SUNLOCK(efbuf->fdp); error = export_kinfo_to_sb(efbuf); FILEDESC_SLOCK(efbuf->fdp); return (error); } static int export_vnode_to_sb(struct vnode *vp, int fd, int fflags, struct export_fd_buf *efbuf) { int error; if (efbuf->remainder == 0) return (0); if (efbuf->fdp != NULL) FILEDESC_SUNLOCK(efbuf->fdp); export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags); error = export_kinfo_to_sb(efbuf); if (efbuf->fdp != NULL) FILEDESC_SLOCK(efbuf->fdp); return (error); } /* * Store a process file descriptor information to sbuf. * * Takes a locked proc as argument, and returns with the proc unlocked. */ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { struct file *fp; struct filedesc *fdp; struct export_fd_buf *efbuf; struct vnode *cttyvp, *textvp, *tracevp; int error, i; cap_rights_t rights; PROC_LOCK_ASSERT(p, MA_OWNED); /* ktrace vnode */ tracevp = p->p_tracevp; if (tracevp != NULL) vrefact(tracevp); /* text vnode */ textvp = p->p_textvp; if (textvp != NULL) vrefact(textvp); /* Controlling tty. */ cttyvp = NULL; if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) { cttyvp = p->p_pgrp->pg_session->s_ttyvp; if (cttyvp != NULL) vrefact(cttyvp); } fdp = fdhold(p); PROC_UNLOCK(p); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); efbuf->fdp = NULL; efbuf->sb = sb; efbuf->remainder = maxlen; efbuf->flags = flags; if (tracevp != NULL) export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE, efbuf); if (textvp != NULL) export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf); if (cttyvp != NULL) export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE, efbuf); error = 0; if (fdp == NULL) goto fail; efbuf->fdp = fdp; FILEDESC_SLOCK(fdp); /* working directory */ if (fdp->fd_cdir != NULL) { vrefact(fdp->fd_cdir); export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } /* root directory */ if (fdp->fd_rdir != NULL) { vrefact(fdp->fd_rdir); export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); } /* jail directory */ if (fdp->fd_jdir != NULL) { vrefact(fdp->fd_jdir); export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); } for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; #ifdef CAPABILITIES rights = *cap_rights(fdp, i); #else /* !CAPABILITIES */ cap_rights_init(&rights); #endif /* * Create sysctl entry. It is OK to drop the filedesc * lock inside of export_file_to_sb() as we will * re-validate and re-evaluate its properties when the * loop continues. */ error = export_file_to_sb(fp, i, &rights, efbuf); if (error != 0 || efbuf->remainder == 0) break; } FILEDESC_SUNLOCK(fdp); fddrop(fdp); fail: free(efbuf, M_TEMP); return (error); } #define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5) /* * Get per-process file descriptors for use by procstat(1), et al. */ static int sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct proc *p; ssize_t maxlen; int error, error2, *name; name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; error = kern_proc_filedesc_out(p, &sb, maxlen, KERN_FILEDESC_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } #ifdef COMPAT_FREEBSD7 #ifdef KINFO_OFILE_SIZE CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE); #endif static void kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif) { okif->kf_structsize = sizeof(*okif); okif->kf_type = kif->kf_type; okif->kf_fd = kif->kf_fd; okif->kf_ref_count = kif->kf_ref_count; okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE | KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK | KF_FLAG_DIRECT | KF_FLAG_HASLOCK); okif->kf_offset = kif->kf_offset; - okif->kf_vnode_type = kif->kf_vnode_type; - okif->kf_sock_domain = kif->kf_sock_domain; - okif->kf_sock_type = kif->kf_sock_type; - okif->kf_sock_protocol = kif->kf_sock_protocol; + if (kif->kf_type == KF_TYPE_VNODE) + okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type; + else + okif->kf_vnode_type = KF_VTYPE_VNON; strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path)); - okif->kf_sa_local = kif->kf_sa_local; - okif->kf_sa_peer = kif->kf_sa_peer; + if (kif->kf_type == KF_TYPE_SOCKET) { + okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0; + okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0; + okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0; + okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local; + okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer; + } else { + okif->kf_sa_local.ss_family = AF_UNSPEC; + okif->kf_sa_peer.ss_family = AF_UNSPEC; + } } static int export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif, struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req) { int error; vrefact(vp); FILEDESC_SUNLOCK(fdp); export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO); kinfo_to_okinfo(kif, okif); error = SYSCTL_OUT(req, okif, sizeof(*okif)); FILEDESC_SLOCK(fdp); return (error); } /* * Get per-process file descriptors for use by procstat(1), et al. */ static int sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) { struct kinfo_ofile *okif; struct kinfo_file *kif; struct filedesc *fdp; int error, i, *name; struct file *fp; struct proc *p; name = (int *)arg1; error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) return (error); fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) return (ENOENT); kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); FILEDESC_SLOCK(fdp); if (fdp->fd_cdir != NULL) export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, okif, fdp, req); if (fdp->fd_rdir != NULL) export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, okif, fdp, req); if (fdp->fd_jdir != NULL) export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, okif, fdp, req); for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; export_file_to_kinfo(fp, i, NULL, kif, fdp, KERN_FILEDESC_PACK_KINFO); FILEDESC_SUNLOCK(fdp); kinfo_to_okinfo(kif, okif); error = SYSCTL_OUT(req, okif, sizeof(*okif)); FILEDESC_SLOCK(fdp); if (error) break; } FILEDESC_SUNLOCK(fdp); fddrop(fdp); free(kif, M_TEMP); free(okif, M_TEMP); return (0); } static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc, "Process ofiledesc entries"); #endif /* COMPAT_FREEBSD7 */ int vntype_to_kinfo(int vtype) { struct { int vtype; int kf_vtype; } vtypes_table[] = { { VBAD, KF_VTYPE_VBAD }, { VBLK, KF_VTYPE_VBLK }, { VCHR, KF_VTYPE_VCHR }, { VDIR, KF_VTYPE_VDIR }, { VFIFO, KF_VTYPE_VFIFO }, { VLNK, KF_VTYPE_VLNK }, { VNON, KF_VTYPE_VNON }, { VREG, KF_VTYPE_VREG }, { VSOCK, KF_VTYPE_VSOCK } }; unsigned int i; /* * Perform vtype translation. */ for (i = 0; i < nitems(vtypes_table); i++) if (vtypes_table[i].vtype == vtype) return (vtypes_table[i].kf_vtype); return (KF_VTYPE_UNKNOWN); } static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc, "Process filedesc entries"); /* * Store a process current working directory information to sbuf. * * Takes a locked proc as argument, and returns with the proc unlocked. */ int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) { struct filedesc *fdp; struct export_fd_buf *efbuf; int error; PROC_LOCK_ASSERT(p, MA_OWNED); fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) return (EINVAL); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); efbuf->fdp = fdp; efbuf->sb = sb; efbuf->remainder = maxlen; FILEDESC_SLOCK(fdp); if (fdp->fd_cdir == NULL) error = EINVAL; else { vrefact(fdp->fd_cdir); error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } FILEDESC_SUNLOCK(fdp); fddrop(fdp); free(efbuf, M_TEMP); return (error); } /* * Get per-process current working directory. */ static int sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct proc *p; ssize_t maxlen; int error, error2, *name; name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; error = kern_proc_cwd_out(p, &sb, maxlen); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_cwd, "Process current working directory"); #ifdef DDB /* * For the purposes of debugging, generate a human-readable string for the * file type. */ static const char * file_type_to_name(short type) { switch (type) { case 0: return ("zero"); case DTYPE_VNODE: return ("vnod"); case DTYPE_SOCKET: return ("sock"); case DTYPE_PIPE: return ("pipe"); case DTYPE_FIFO: return ("fifo"); case DTYPE_KQUEUE: return ("kque"); case DTYPE_CRYPTO: return ("crpt"); case DTYPE_MQUEUE: return ("mque"); case DTYPE_SHM: return ("shm"); case DTYPE_SEM: return ("ksem"); default: return ("unkn"); } } /* * For the purposes of debugging, identify a process (if any, perhaps one of * many) that references the passed file in its file descriptor array. Return * NULL if none. */ static struct proc * file_to_first_proc(struct file *fp) { struct filedesc *fdp; struct proc *p; int n; FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state == PRS_NEW) continue; fdp = p->p_fd; if (fdp == NULL) continue; for (n = 0; n <= fdp->fd_lastfile; n++) { if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } return (NULL); } static void db_print_file(struct file *fp, int header) { struct proc *p; if (header) db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", "File", "Type", "Data", "Flag", "GCFl", "Count", "MCount", "Vnode", "FPID", "FCmd"); p = file_to_first_proc(fp); db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 0, fp->f_count, 0, fp->f_vnode, p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); } DB_SHOW_COMMAND(file, db_show_file) { struct file *fp; if (!have_addr) { db_printf("usage: show file \n"); return; } fp = (struct file *)addr; db_print_file(fp, 1); } DB_SHOW_COMMAND(files, db_show_files) { struct filedesc *fdp; struct file *fp; struct proc *p; int header; int n; header = 1; FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state == PRS_NEW) continue; if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n <= fdp->fd_lastfile; ++n) { if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; } } } #endif SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, &maxfilesperproc, 0, "Maximum files allowed open per process"); SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "Maximum number of files"); SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); /* ARGSUSED*/ static void filelistinit(void *dummy) { file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); /*-------------------------------------------------------------------*/ static int badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EBADF); } static int badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { return (EINVAL); } static int badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { return (0); } static int badfo_kqfilter(struct file *fp, struct knote *kn) { return (EBADF); } static int badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_close(struct file *fp, struct thread *td) { return (0); } static int badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { return (EBADF); } static int badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { return (0); } struct fileops badfileops = { .fo_read = badfo_readwrite, .fo_write = badfo_readwrite, .fo_truncate = badfo_truncate, .fo_ioctl = badfo_ioctl, .fo_poll = badfo_poll, .fo_kqfilter = badfo_kqfilter, .fo_stat = badfo_stat, .fo_close = badfo_close, .fo_chmod = badfo_chmod, .fo_chown = badfo_chown, .fo_sendfile = badfo_sendfile, .fo_fill_kinfo = badfo_fill_kinfo, }; int invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EOPNOTSUPP); } int invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { return (ENOTTY); } int invfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { return (poll_no_poll(events)); } int invfo_kqfilter(struct file *fp, struct knote *kn) { return (EINVAL); } int invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { return (EINVAL); } /*-------------------------------------------------------------------*/ /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. * * XXX: we could give this one a cloning event handler if necessary. */ /* ARGSUSED */ static int fdopen(struct cdev *dev, int mode, int type, struct thread *td) { /* * XXX Kludge: set curthread->td_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ td->td_dupfd = dev2unit(dev); return (ENODEV); } static struct cdevsw fildesc_cdevsw = { .d_version = D_VERSION, .d_open = fdopen, .d_name = "FD", }; static void fildesc_drvinit(void *unused) { struct cdev *dev; dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/0"); make_dev_alias(dev, "stdin"); dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/1"); make_dev_alias(dev, "stdout"); dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/2"); make_dev_alias(dev, "stderr"); } SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); Index: head/sys/kern/kern_proc.c =================================================================== --- head/sys/kern/kern_proc.c (revision 318735) +++ head/sys/kern/kern_proc.c (revision 318736) @@ -1,3116 +1,3125 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_ktrace.h" #include "opt_kstack_pages.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #endif SDT_PROVIDER_DEFINE(proc); SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *", "int"); SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *", "int"); SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *", "struct thread *"); SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *"); SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int"); SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int"); MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); MALLOC_DEFINE(M_SESSION, "session", "session header"); static MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); static void doenterpgrp(struct proc *, struct pgrp *); static void orphanpg(struct pgrp *pg); static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp); static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread); static void pgadjustjobc(struct pgrp *pgrp, int entering); static void pgdelete(struct pgrp *); static int proc_ctor(void *mem, int size, void *arg, int flags); static void proc_dtor(void *mem, int size, void *arg); static int proc_init(void *mem, int size, int flags); static void proc_fini(void *mem, int size); static void pargs_free(struct pargs *pa); static struct proc *zpfind_locked(pid_t pid); /* * Other process lists */ struct pidhashhead *pidhashtbl; u_long pidhash; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; struct sx allproc_lock; struct sx proctree_lock; struct mtx ppeers_lock; uma_zone_t proc_zone; /* * The offset of various fields in struct proc and struct thread. * These are used by kernel debuggers to enumerate kernel threads and * processes. */ const int proc_off_p_pid = offsetof(struct proc, p_pid); const int proc_off_p_comm = offsetof(struct proc, p_comm); const int proc_off_p_list = offsetof(struct proc, p_list); const int proc_off_p_threads = offsetof(struct proc, p_threads); const int thread_off_td_tid = offsetof(struct thread, td_tid); const int thread_off_td_name = offsetof(struct thread, td_name); const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu); const int thread_off_td_pcb = offsetof(struct thread, td_pcb); const int thread_off_td_plist = offsetof(struct thread, td_plist); int kstack_pages = KSTACK_PAGES; SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "Kernel stack size in pages"); static int vmmap_skip_res_cnt = 0; SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW, &vmmap_skip_res_cnt, 0, "Skip calculation of the pages resident count in kern.proc.vmmap"); CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); #ifdef COMPAT_FREEBSD32 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE); #endif /* * Initialize global process hashing structures. */ void procinit(void) { sx_init(&allproc_lock, "allproc"); sx_init(&proctree_lock, "proctree"); mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF); LIST_INIT(&allproc); LIST_INIT(&zombproc); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); proc_zone = uma_zcreate("PROC", sched_sizeof_proc(), proc_ctor, proc_dtor, proc_init, proc_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uihashinit(); } /* * Prepare a proc for use. */ static int proc_ctor(void *mem, int size, void *arg, int flags) { struct proc *p; struct thread *td; p = (struct proc *)mem; SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags); EVENTHANDLER_INVOKE(process_ctor, p); SDT_PROBE4(proc, , ctor , return, p, size, arg, flags); td = FIRST_THREAD_IN_PROC(p); if (td != NULL) { /* Make sure all thread constructors are executed */ EVENTHANDLER_INVOKE(thread_ctor, td); } return (0); } /* * Reclaim a proc after use. */ static void proc_dtor(void *mem, int size, void *arg) { struct proc *p; struct thread *td; /* INVARIANTS checks go here */ p = (struct proc *)mem; td = FIRST_THREAD_IN_PROC(p); SDT_PROBE4(proc, , dtor, entry, p, size, arg, td); if (td != NULL) { #ifdef INVARIANTS KASSERT((p->p_numthreads == 1), ("bad number of threads in exiting process")); KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr")); #endif /* Free all OSD associated to this thread. */ osd_thread_exit(td); /* Make sure all thread destructors are executed */ EVENTHANDLER_INVOKE(thread_dtor, td); } EVENTHANDLER_INVOKE(process_dtor, p); if (p->p_ksi != NULL) KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue")); SDT_PROBE3(proc, , dtor, return, p, size, arg); } /* * Initialize type-stable parts of a proc (when newly created). */ static int proc_init(void *mem, int size, int flags) { struct proc *p; p = (struct proc *)mem; SDT_PROBE3(proc, , init, entry, p, size, flags); mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW); mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW); mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW); cv_init(&p->p_pwait, "ppwait"); cv_init(&p->p_dbgwait, "dbgwait"); TAILQ_INIT(&p->p_threads); /* all threads in proc */ EVENTHANDLER_INVOKE(process_init, p); p->p_stats = pstats_alloc(); p->p_pgrp = NULL; SDT_PROBE3(proc, , init, return, p, size, flags); return (0); } /* * UMA should ensure that this function is never called. * Freeing a proc structure would violate type stability. */ static void proc_fini(void *mem, int size) { #ifdef notnow struct proc *p; p = (struct proc *)mem; EVENTHANDLER_INVOKE(process_fini, p); pstats_free(p->p_stats); thread_free(FIRST_THREAD_IN_PROC(p)); mtx_destroy(&p->p_mtx); if (p->p_ksi != NULL) ksiginfo_free(p->p_ksi); #else panic("proc reclaimed"); #endif } /* * Is p an inferior of the current process? */ int inferior(struct proc *p) { sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); for (; p != curproc; p = proc_realparent(p)) { if (p->p_pid == 0) return (0); } return (1); } struct proc * pfind_locked(pid_t pid) { struct proc *p; sx_assert(&allproc_lock, SX_LOCKED); LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); p = NULL; } break; } } return (p); } /* * Locate a process by number; return only "live" processes -- i.e., neither * zombies nor newly born but incompletely initialized processes. By not * returning processes in the PRS_NEW state, we allow callers to avoid * testing for that condition to avoid dereferencing p_ucred, et al. */ struct proc * pfind(pid_t pid) { struct proc *p; sx_slock(&allproc_lock); p = pfind_locked(pid); sx_sunlock(&allproc_lock); return (p); } static struct proc * pfind_tid_locked(pid_t tid) { struct proc *p; struct thread *td; sx_assert(&allproc_lock, SX_LOCKED); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } FOREACH_THREAD_IN_PROC(p, td) { if (td->td_tid == tid) goto found; } PROC_UNLOCK(p); } found: return (p); } /* * Locate a process group by number. * The caller must hold proctree_lock. */ struct pgrp * pgfind(pid_t pgid) { struct pgrp *pgrp; sx_assert(&proctree_lock, SX_LOCKED); LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) { if (pgrp->pg_id == pgid) { PGRP_LOCK(pgrp); return (pgrp); } } return (NULL); } /* * Locate process and do additional manipulations, depending on flags. */ int pget(pid_t pid, int flags, struct proc **pp) { struct proc *p; int error; sx_slock(&allproc_lock); if (pid <= PID_MAX) { p = pfind_locked(pid); if (p == NULL && (flags & PGET_NOTWEXIT) == 0) p = zpfind_locked(pid); } else if ((flags & PGET_NOTID) == 0) { p = pfind_tid_locked(pid); } else { p = NULL; } sx_sunlock(&allproc_lock); if (p == NULL) return (ESRCH); if ((flags & PGET_CANSEE) != 0) { error = p_cansee(curthread, p); if (error != 0) goto errout; } if ((flags & PGET_CANDEBUG) != 0) { error = p_candebug(curthread, p); if (error != 0) goto errout; } if ((flags & PGET_ISCURRENT) != 0 && curproc != p) { error = EPERM; goto errout; } if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto errout; } if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) { /* * XXXRW: Not clear ESRCH is the right error during proc * execve(). */ error = ESRCH; goto errout; } if ((flags & PGET_HOLD) != 0) { _PHOLD(p); PROC_UNLOCK(p); } *pp = p; return (0); errout: PROC_UNLOCK(p); return (error); } /* * Create a new process group. * pgid must be equal to the pid of p. * Begin a new session if required. */ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess) { sx_assert(&proctree_lock, SX_XLOCKED); KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL")); KASSERT(p->p_pid == pgid, ("enterpgrp: new pgrp and pid != pgid")); KASSERT(pgfind(pgid) == NULL, ("enterpgrp: pgrp with pgid exists")); KASSERT(!SESS_LEADER(p), ("enterpgrp: session leader attempted setpgrp")); mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); if (sess != NULL) { /* * new session */ mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF); PROC_LOCK(p); p->p_flag &= ~P_CONTROLT; PROC_UNLOCK(p); PGRP_LOCK(pgrp); sess->s_leader = p; sess->s_sid = p->p_pid; refcount_init(&sess->s_count, 1); sess->s_ttyvp = NULL; sess->s_ttydp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, sizeof(sess->s_login)); pgrp->pg_session = sess; KASSERT(p == curproc, ("enterpgrp: mksession and p != curproc")); } else { pgrp->pg_session = p->p_session; sess_hold(pgrp->pg_session); PGRP_LOCK(pgrp); } pgrp->pg_id = pgid; LIST_INIT(&pgrp->pg_members); /* * As we have an exclusive lock of proctree_lock, * this should not deadlock. */ LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); pgrp->pg_jobc = 0; SLIST_INIT(&pgrp->pg_sigiolst); PGRP_UNLOCK(pgrp); doenterpgrp(p, pgrp); return (0); } /* * Move p to an existing process group */ int enterthispgrp(struct proc *p, struct pgrp *pgrp) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); KASSERT(pgrp->pg_session == p->p_session, ("%s: pgrp's session %p, p->p_session %p.\n", __func__, pgrp->pg_session, p->p_session)); KASSERT(pgrp != p->p_pgrp, ("%s: p belongs to pgrp.", __func__)); doenterpgrp(p, pgrp); return (0); } /* * Move p to a process group */ static void doenterpgrp(struct proc *p, struct pgrp *pgrp) { struct pgrp *savepgrp; sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED); savepgrp = p->p_pgrp; /* * Adjust eligibility of affected pgrps to participate in job control. * Increment eligibility counts before decrementing, otherwise we * could reach 0 spuriously during the first call. */ fixjobc(p, pgrp, 1); fixjobc(p, p->p_pgrp, 0); PGRP_LOCK(pgrp); PGRP_LOCK(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = pgrp; PROC_UNLOCK(p); LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); PGRP_UNLOCK(savepgrp); PGRP_UNLOCK(pgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); } /* * remove process from process group */ int leavepgrp(struct proc *p) { struct pgrp *savepgrp; sx_assert(&proctree_lock, SX_XLOCKED); savepgrp = p->p_pgrp; PGRP_LOCK(savepgrp); PROC_LOCK(p); LIST_REMOVE(p, p_pglist); p->p_pgrp = NULL; PROC_UNLOCK(p); PGRP_UNLOCK(savepgrp); if (LIST_EMPTY(&savepgrp->pg_members)) pgdelete(savepgrp); return (0); } /* * delete a process group */ static void pgdelete(struct pgrp *pgrp) { struct session *savesess; struct tty *tp; sx_assert(&proctree_lock, SX_XLOCKED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pgid. */ funsetownlst(&pgrp->pg_sigiolst); PGRP_LOCK(pgrp); tp = pgrp->pg_session->s_ttyp; LIST_REMOVE(pgrp, pg_hash); savesess = pgrp->pg_session; PGRP_UNLOCK(pgrp); /* Remove the reference to the pgrp before deallocating it. */ if (tp != NULL) { tty_lock(tp); tty_rel_pgrp(tp, pgrp); } mtx_destroy(&pgrp->pg_mtx); free(pgrp, M_PGRP); sess_release(savesess); } static void pgadjustjobc(struct pgrp *pgrp, int entering) { PGRP_LOCK(pgrp); if (entering) pgrp->pg_jobc++; else { --pgrp->pg_jobc; if (pgrp->pg_jobc == 0) orphanpg(pgrp); } PGRP_UNLOCK(pgrp); } /* * Adjust pgrp jobc counters when specified process changes process group. * We count the number of processes in each process group that "qualify" * the group for terminal job control (those with a parent in a different * process group of the same session). If that count reaches zero, the * process group becomes orphaned. Check both the specified process' * process group and that of its children. * entering == 0 => p is leaving specified group. * entering == 1 => p is entering specified group. */ void fixjobc(struct proc *p, struct pgrp *pgrp, int entering) { struct pgrp *hispgrp; struct session *mysession; struct proc *q; sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED); SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED); /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ mysession = pgrp->pg_session; if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && hispgrp->pg_session == mysession) pgadjustjobc(pgrp, entering); /* * Check this process' children to see whether they qualify * their process groups; if so, adjust counts for children's * process groups. */ LIST_FOREACH(q, &p->p_children, p_sibling) { hispgrp = q->p_pgrp; if (hispgrp == pgrp || hispgrp->pg_session != mysession) continue; if (q->p_state == PRS_ZOMBIE) continue; pgadjustjobc(hispgrp, entering); } } void killjobc(void) { struct session *sp; struct tty *tp; struct proc *p; struct vnode *ttyvp; p = curproc; MPASS(p->p_flag & P_WEXIT); /* * Do a quick check to see if there is anything to do with the * proctree_lock held. pgrp and LIST_EMPTY checks are for fixjobc(). */ PROC_LOCK(p); if (!SESS_LEADER(p) && (p->p_pgrp == p->p_pptr->p_pgrp) && LIST_EMPTY(&p->p_children)) { PROC_UNLOCK(p); return; } PROC_UNLOCK(p); sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { sp = p->p_session; /* * s_ttyp is not zero'd; we use this to indicate that * the session once had a controlling terminal. (for * logging and informational purposes) */ SESS_LOCK(sp); ttyvp = sp->s_ttyvp; tp = sp->s_ttyp; sp->s_ttyvp = NULL; sp->s_ttydp = NULL; sp->s_leader = NULL; SESS_UNLOCK(sp); /* * Signal foreground pgrp and revoke access to * controlling terminal if it has not been revoked * already. * * Because the TTY may have been revoked in the mean * time and could already have a new session associated * with it, make sure we don't send a SIGHUP to a * foreground process group that does not belong to this * session. */ if (tp != NULL) { tty_lock(tp); if (tp->t_session == sp) tty_signal_pgrp(tp, SIGHUP); tty_unlock(tp); } if (ttyvp != NULL) { sx_xunlock(&proctree_lock); if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) { VOP_REVOKE(ttyvp, REVOKEALL); VOP_UNLOCK(ttyvp, 0); } vrele(ttyvp); sx_xlock(&proctree_lock); } } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); } /* * A process group has become orphaned; * if there are any stopped processes in the group, * hang-up all process in that group. */ static void orphanpg(struct pgrp *pg) { struct proc *p; PGRP_LOCK_ASSERT(pg, MA_OWNED); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (P_SHOULDSTOP(p) == P_STOPPED_SIG) { PROC_UNLOCK(p); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); kern_psignal(p, SIGHUP); kern_psignal(p, SIGCONT); PROC_UNLOCK(p); } return; } PROC_UNLOCK(p); } } void sess_hold(struct session *s) { refcount_acquire(&s->s_count); } void sess_release(struct session *s) { if (refcount_release(&s->s_count)) { if (s->s_ttyp != NULL) { tty_lock(s->s_ttyp); tty_rel_sess(s->s_ttyp, s); } mtx_destroy(&s->s_mtx); free(s, M_SESSION); } } #ifdef DDB DB_SHOW_COMMAND(pgrpdump, pgrpdump) { struct pgrp *pgrp; struct proc *p; int i; for (i = 0; i <= pgrphash; i++) { if (!LIST_EMPTY(&pgrphashtbl[i])) { printf("\tindx %d\n", i); LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) { printf( "\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n", (void *)pgrp, (long)pgrp->pg_id, (void *)pgrp->pg_session, pgrp->pg_session->s_count, (void *)LIST_FIRST(&pgrp->pg_members)); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { printf("\t\tpid %ld addr %p pgrp %p\n", (long)p->p_pid, (void *)p, (void *)p->p_pgrp); } } } } } #endif /* DDB */ /* * Calculate the kinfo_proc members which contain process-wide * informations. * Must be called with the target process locked. */ static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); kp->ki_estcpu = 0; kp->ki_pctcpu = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); kp->ki_pctcpu += sched_pctcpu(td); kp->ki_estcpu += sched_estcpu(td); thread_unlock(td); } } /* * Clear kinfo_proc and fill in any information that is common * to all threads in the process. * Must be called with the target process locked. */ static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) { struct thread *td0; struct tty *tp; struct session *sp; struct ucred *cred; struct sigacts *ps; struct timeval boottime; /* For proc_realparent. */ sx_assert(&proctree_lock, SX_LOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); bzero(kp, sizeof(*kp)); kp->ki_structsize = sizeof(*kp); kp->ki_paddr = p; kp->ki_addr =/* p->p_addr; */0; /* XXX */ kp->ki_args = p->p_args; kp->ki_textvp = p->p_textvp; #ifdef KTRACE kp->ki_tracep = p->p_tracevp; kp->ki_traceflag = p->p_traceflag; #endif kp->ki_fd = p->p_fd; kp->ki_vmspace = p->p_vmspace; kp->ki_flag = p->p_flag; kp->ki_flag2 = p->p_flag2; cred = p->p_ucred; if (cred) { kp->ki_uid = cred->cr_uid; kp->ki_ruid = cred->cr_ruid; kp->ki_svuid = cred->cr_svuid; kp->ki_cr_flags = 0; if (cred->cr_flags & CRED_FLAG_CAPMODE) kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE; /* XXX bde doesn't like KI_NGROUPS */ if (cred->cr_ngroups > KI_NGROUPS) { kp->ki_ngroups = KI_NGROUPS; kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW; } else kp->ki_ngroups = cred->cr_ngroups; bcopy(cred->cr_groups, kp->ki_groups, kp->ki_ngroups * sizeof(gid_t)); kp->ki_rgid = cred->cr_rgid; kp->ki_svgid = cred->cr_svgid; /* If jailed(cred), emulate the old P_JAILED flag. */ if (jailed(cred)) { kp->ki_flag |= P_JAILED; /* If inside the jail, use 0 as a jail ID. */ if (cred->cr_prison != curthread->td_ucred->cr_prison) kp->ki_jid = cred->cr_prison->pr_id; } strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name, sizeof(kp->ki_loginclass)); } ps = p->p_sigacts; if (ps) { mtx_lock(&ps->ps_mtx); kp->ki_sigignore = ps->ps_sigignore; kp->ki_sigcatch = ps->ps_sigcatch; mtx_unlock(&ps->ps_mtx); } if (p->p_state != PRS_NEW && p->p_state != PRS_ZOMBIE && p->p_vmspace != NULL) { struct vmspace *vm = p->p_vmspace; kp->ki_size = vm->vm_map.size; kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/ FOREACH_THREAD_IN_PROC(p, td0) { if (!TD_IS_SWAPPED(td0)) kp->ki_rssize += td0->td_kstack_pages; } kp->ki_swrss = vm->vm_swrss; kp->ki_tsize = vm->vm_tsize; kp->ki_dsize = vm->vm_dsize; kp->ki_ssize = vm->vm_ssize; } else if (p->p_state == PRS_ZOMBIE) kp->ki_stat = SZOMB; if (kp->ki_flag & P_INMEM) kp->ki_sflag = PS_INMEM; else kp->ki_sflag = 0; /* Calculate legacy swtime as seconds since 'swtick'. */ kp->ki_swtime = (ticks - p->p_swtick) / hz; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; kp->ki_fibnum = p->p_fibnum; kp->ki_start = p->p_stats->p_start; getboottime(&boottime); timevaladd(&kp->ki_start, &boottime); PROC_STATLOCK(p); rufetch(p, &kp->ki_rusage); kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime); PROC_STATUNLOCK(p); calccru(p, &kp->ki_childutime, &kp->ki_childstime); /* Some callers want child times in a single value. */ kp->ki_childtime = kp->ki_childstime; timevaladd(&kp->ki_childtime, &kp->ki_childutime); FOREACH_THREAD_IN_PROC(p, td0) kp->ki_cow += td0->td_cow; tp = NULL; if (p->p_pgrp) { kp->ki_pgid = p->p_pgrp->pg_id; kp->ki_jobc = p->p_pgrp->pg_jobc; sp = p->p_pgrp->pg_session; if (sp != NULL) { kp->ki_sid = sp->s_sid; SESS_LOCK(sp); strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login)); if (sp->s_ttyvp) kp->ki_kiflag |= KI_CTTY; if (SESS_LEADER(p)) kp->ki_kiflag |= KI_SLEADER; /* XXX proctree_lock */ tp = sp->s_ttyp; SESS_UNLOCK(sp); } } if ((p->p_flag & P_CONTROLT) && tp != NULL) { kp->ki_tdev = tty_udev(tp); + kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; if (tp->t_session) kp->ki_tsid = tp->t_session->s_sid; - } else + } else { kp->ki_tdev = NODEV; + kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */ + } if (p->p_comm[0] != '\0') strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm)); if (p->p_sysent && p->p_sysent->sv_name != NULL && p->p_sysent->sv_name[0] != '\0') strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul)); kp->ki_siglist = p->p_siglist; kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig); kp->ki_acflag = p->p_acflag; kp->ki_lock = p->p_lock; if (p->p_pptr) { kp->ki_ppid = proc_realparent(p)->p_pid; if (p->p_flag & P_TRACED) kp->ki_tracer = p->p_pptr->p_pid; } } /* * Fill in information that is thread specific. Must be called with * target process locked. If 'preferthread' is set, overwrite certain * process-related fields that are maintained for both threads and * processes. */ static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread) { struct proc *p; p = td->td_proc; kp->ki_tdaddr = td; PROC_LOCK_ASSERT(p, MA_OWNED); if (preferthread) PROC_STATLOCK(p); thread_lock(td); if (td->td_wmesg != NULL) strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); else bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg)); if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >= sizeof(kp->ki_tdname)) { strlcpy(kp->ki_moretdname, td->td_name + sizeof(kp->ki_tdname) - 1, sizeof(kp->ki_moretdname)); } else { bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname)); } if (TD_ON_LOCK(td)) { kp->ki_kiflag |= KI_LOCKBLOCK; strlcpy(kp->ki_lockname, td->td_lockname, sizeof(kp->ki_lockname)); } else { kp->ki_kiflag &= ~KI_LOCKBLOCK; bzero(kp->ki_lockname, sizeof(kp->ki_lockname)); } if (p->p_state == PRS_NORMAL) { /* approximate. */ if (TD_ON_RUNQ(td) || TD_CAN_RUN(td) || TD_IS_RUNNING(td)) { kp->ki_stat = SRUN; } else if (P_SHOULDSTOP(p)) { kp->ki_stat = SSTOP; } else if (TD_IS_SLEEPING(td)) { kp->ki_stat = SSLEEP; } else if (TD_ON_LOCK(td)) { kp->ki_stat = SLOCK; } else { kp->ki_stat = SWAIT; } } else if (p->p_state == PRS_ZOMBIE) { kp->ki_stat = SZOMB; } else { kp->ki_stat = SIDL; } /* Things in the thread */ kp->ki_wchan = td->td_wchan; kp->ki_pri.pri_level = td->td_priority; kp->ki_pri.pri_native = td->td_base_pri; /* * Note: legacy fields; clamp at the old NOCPU value and/or * the maximum u_char CPU value. */ if (td->td_lastcpu == NOCPU) kp->ki_lastcpu_old = NOCPU_OLD; else if (td->td_lastcpu > MAXCPU_OLD) kp->ki_lastcpu_old = MAXCPU_OLD; else kp->ki_lastcpu_old = td->td_lastcpu; if (td->td_oncpu == NOCPU) kp->ki_oncpu_old = NOCPU_OLD; else if (td->td_oncpu > MAXCPU_OLD) kp->ki_oncpu_old = MAXCPU_OLD; else kp->ki_oncpu_old = td->td_oncpu; kp->ki_lastcpu = td->td_lastcpu; kp->ki_oncpu = td->td_oncpu; kp->ki_tdflags = td->td_flags; kp->ki_tid = td->td_tid; kp->ki_numthreads = p->p_numthreads; kp->ki_pcb = td->td_pcb; kp->ki_kstack = (void *)td->td_kstack; kp->ki_slptime = (ticks - td->td_slptick) / hz; kp->ki_pri.pri_class = td->td_pri_class; kp->ki_pri.pri_user = td->td_user_pri; if (preferthread) { rufetchtd(td, &kp->ki_rusage); kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime); kp->ki_pctcpu = sched_pctcpu(td); kp->ki_estcpu = sched_estcpu(td); kp->ki_cow = td->td_cow; } /* We can't get this anymore but ps etc never used it anyway. */ kp->ki_rqindex = 0; if (preferthread) kp->ki_siglist = td->td_siglist; kp->ki_sigmask = td->td_sigmask; thread_unlock(td); if (preferthread) PROC_STATUNLOCK(p); } /* * Fill in a kinfo_proc structure for the specified process. * Must be called with the target process locked. */ void fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) { MPASS(FIRST_THREAD_IN_PROC(p) != NULL); fill_kinfo_proc_only(p, kp); fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0); fill_kinfo_aggregate(p, kp); } struct pstats * pstats_alloc(void) { return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK)); } /* * Copy parts of p_stats; zero the rest of p_stats (statistics). */ void pstats_fork(struct pstats *src, struct pstats *dst) { bzero(&dst->pstat_startzero, __rangeof(struct pstats, pstat_startzero, pstat_endzero)); bcopy(&src->pstat_startcopy, &dst->pstat_startcopy, __rangeof(struct pstats, pstat_startcopy, pstat_endcopy)); } void pstats_free(struct pstats *ps) { free(ps, M_SUBPROC); } static struct proc * zpfind_locked(pid_t pid) { struct proc *p; sx_assert(&allproc_lock, SX_LOCKED); LIST_FOREACH(p, &zombproc, p_list) { if (p->p_pid == pid) { PROC_LOCK(p); break; } } return (p); } /* * Locate a zombie process by number */ struct proc * zpfind(pid_t pid) { struct proc *p; sx_slock(&allproc_lock); p = zpfind_locked(pid); sx_sunlock(&allproc_lock); return (p); } #ifdef COMPAT_FREEBSD32 /* * This function is typically used to copy out the kernel address, so * it can be replaced by assignment of zero. */ static inline uint32_t ptr32_trim(void *ptr) { uintptr_t uptr; uptr = (uintptr_t)ptr; return ((uptr > UINT_MAX) ? 0 : uptr); } #define PTRTRIM_CP(src,dst,fld) \ do { (dst).fld = ptr32_trim((src).fld); } while (0) static void freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) { int i; bzero(ki32, sizeof(struct kinfo_proc32)); ki32->ki_structsize = sizeof(struct kinfo_proc32); CP(*ki, *ki32, ki_layout); PTRTRIM_CP(*ki, *ki32, ki_args); PTRTRIM_CP(*ki, *ki32, ki_paddr); PTRTRIM_CP(*ki, *ki32, ki_addr); PTRTRIM_CP(*ki, *ki32, ki_tracep); PTRTRIM_CP(*ki, *ki32, ki_textvp); PTRTRIM_CP(*ki, *ki32, ki_fd); PTRTRIM_CP(*ki, *ki32, ki_vmspace); PTRTRIM_CP(*ki, *ki32, ki_wchan); CP(*ki, *ki32, ki_pid); CP(*ki, *ki32, ki_ppid); CP(*ki, *ki32, ki_pgid); CP(*ki, *ki32, ki_tpgid); CP(*ki, *ki32, ki_sid); CP(*ki, *ki32, ki_tsid); CP(*ki, *ki32, ki_jobc); CP(*ki, *ki32, ki_tdev); + CP(*ki, *ki32, ki_tdev_freebsd11); CP(*ki, *ki32, ki_siglist); CP(*ki, *ki32, ki_sigmask); CP(*ki, *ki32, ki_sigignore); CP(*ki, *ki32, ki_sigcatch); CP(*ki, *ki32, ki_uid); CP(*ki, *ki32, ki_ruid); CP(*ki, *ki32, ki_svuid); CP(*ki, *ki32, ki_rgid); CP(*ki, *ki32, ki_svgid); CP(*ki, *ki32, ki_ngroups); for (i = 0; i < KI_NGROUPS; i++) CP(*ki, *ki32, ki_groups[i]); CP(*ki, *ki32, ki_size); CP(*ki, *ki32, ki_rssize); CP(*ki, *ki32, ki_swrss); CP(*ki, *ki32, ki_tsize); CP(*ki, *ki32, ki_dsize); CP(*ki, *ki32, ki_ssize); CP(*ki, *ki32, ki_xstat); CP(*ki, *ki32, ki_acflag); CP(*ki, *ki32, ki_pctcpu); CP(*ki, *ki32, ki_estcpu); CP(*ki, *ki32, ki_slptime); CP(*ki, *ki32, ki_swtime); CP(*ki, *ki32, ki_cow); CP(*ki, *ki32, ki_runtime); TV_CP(*ki, *ki32, ki_start); TV_CP(*ki, *ki32, ki_childtime); CP(*ki, *ki32, ki_flag); CP(*ki, *ki32, ki_kiflag); CP(*ki, *ki32, ki_traceflag); CP(*ki, *ki32, ki_stat); CP(*ki, *ki32, ki_nice); CP(*ki, *ki32, ki_lock); CP(*ki, *ki32, ki_rqindex); CP(*ki, *ki32, ki_oncpu); CP(*ki, *ki32, ki_lastcpu); /* XXX TODO: wrap cpu value as appropriate */ CP(*ki, *ki32, ki_oncpu_old); CP(*ki, *ki32, ki_lastcpu_old); bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1); bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1); bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1); bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1); bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1); bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1); CP(*ki, *ki32, ki_tracer); CP(*ki, *ki32, ki_flag2); CP(*ki, *ki32, ki_fibnum); CP(*ki, *ki32, ki_cr_flags); CP(*ki, *ki32, ki_jid); CP(*ki, *ki32, ki_numthreads); CP(*ki, *ki32, ki_tid); CP(*ki, *ki32, ki_pri); freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage); freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch); PTRTRIM_CP(*ki, *ki32, ki_pcb); PTRTRIM_CP(*ki, *ki32, ki_kstack); PTRTRIM_CP(*ki, *ki32, ki_udata); CP(*ki, *ki32, ki_sflag); CP(*ki, *ki32, ki_tdflags); } #endif int kern_proc_out(struct proc *p, struct sbuf *sb, int flags) { struct thread *td; struct kinfo_proc ki; #ifdef COMPAT_FREEBSD32 struct kinfo_proc32 ki32; #endif int error; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS(FIRST_THREAD_IN_PROC(p) != NULL); error = 0; fill_kinfo_proc(p, &ki); if ((flags & KERN_PROC_NOTHREADS) != 0) { #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; } else { FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &ki, 1); #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) error = ENOMEM; } else #endif if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) error = ENOMEM; if (error != 0) break; } } PROC_UNLOCK(p); return (error); } static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags, int doingzomb) { struct sbuf sb; struct kinfo_proc ki; struct proc *np; int error, error2; pid_t pid; pid = p->p_pid; sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = kern_proc_out(p, &sb, flags); error2 = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0) return (error); else if (error2 != 0) return (error2); if (doingzomb) np = zpfind(pid); else { if (pid == 0) return (0); np = pfind(pid); } if (np == NULL) return (ESRCH); if (np != p) { PROC_UNLOCK(np); return (ESRCH); } PROC_UNLOCK(np); return (0); } static int sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, doingzomb, oid_number; int error = 0; oid_number = oidp->oid_number; if (oid_number != KERN_PROC_ALL && (oid_number & KERN_PROC_INC_THREAD) == 0) flags = KERN_PROC_NOTHREADS; else { flags = 0; oid_number &= ~KERN_PROC_INC_THREAD; } #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) flags |= KERN_PROC_MASK32; #endif if (oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, 0); if (error) return (error); sx_slock(&proctree_lock); error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error == 0) error = sysctl_out_proc(p, req, flags, 0); sx_sunlock(&proctree_lock); return (error); } switch (oid_number) { case KERN_PROC_ALL: if (namelen != 0) return (EINVAL); break; case KERN_PROC_PROC: if (namelen != 0 && namelen != 1) return (EINVAL); break; default: if (namelen != 1) return (EINVAL); break; } if (!req->oldptr) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) return (error); } error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sx_slock(&proctree_lock); sx_slock(&allproc_lock); for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) { if (!doingzomb) p = LIST_FIRST(&allproc); else p = LIST_FIRST(&zombproc); for (; p != NULL; p = LIST_NEXT(p, p_list)) { /* * Skip embryonic processes. */ PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } KASSERT(p->p_ucred != NULL, ("process credential is NULL for non-NEW proc")); /* * Show a user only appropriate processes. */ if (p_cansee(curthread, p)) { PROC_UNLOCK(p); continue; } /* * TODO - make more efficient (see notes below). * do by session. */ switch (oid_number) { case KERN_PROC_GID: if (p->p_ucred->cr_gid != (gid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_PGRP: /* could do this by traversing pgrp */ if (p->p_pgrp == NULL || p->p_pgrp->pg_id != (pid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_RGID: if (p->p_ucred->cr_rgid != (gid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_SESSION: if (p->p_session == NULL || p->p_session->s_sid != (pid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_TTY: if ((p->p_flag & P_CONTROLT) == 0 || p->p_session == NULL) { PROC_UNLOCK(p); continue; } /* XXX proctree_lock */ SESS_LOCK(p->p_session); if (p->p_session->s_ttyp == NULL || tty_udev(p->p_session->s_ttyp) != (dev_t)name[0]) { SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); continue; } SESS_UNLOCK(p->p_session); break; case KERN_PROC_UID: if (p->p_ucred->cr_uid != (uid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_RUID: if (p->p_ucred->cr_ruid != (uid_t)name[0]) { PROC_UNLOCK(p); continue; } break; case KERN_PROC_PROC: break; default: break; } error = sysctl_out_proc(p, req, flags, doingzomb); if (error) { sx_sunlock(&allproc_lock); sx_sunlock(&proctree_lock); return (error); } } } sx_sunlock(&allproc_lock); sx_sunlock(&proctree_lock); return (0); } struct pargs * pargs_alloc(int len) { struct pargs *pa; pa = malloc(sizeof(struct pargs) + len, M_PARGS, M_WAITOK); refcount_init(&pa->ar_ref, 1); pa->ar_length = len; return (pa); } static void pargs_free(struct pargs *pa) { free(pa, M_PARGS); } void pargs_hold(struct pargs *pa) { if (pa == NULL) return; refcount_acquire(&pa->ar_ref); } void pargs_drop(struct pargs *pa) { if (pa == NULL) return; if (refcount_release(&pa->ar_ref)) pargs_free(pa); } static int proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf, size_t len) { ssize_t n; /* * This may return a short read if the string is shorter than the chunk * and is aligned at the end of the page, and the following page is not * mapped. */ n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len); if (n <= 0) return (ENOMEM); return (0); } #define PROC_AUXV_MAX 256 /* Safety limit on auxv size. */ enum proc_vector_type { PROC_ARG, PROC_ENV, PROC_AUX, }; #ifdef COMPAT_FREEBSD32 static int get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct freebsd32_ps_strings pss; Elf32_Auxinfo aux; vm_offset_t vptr, ptr; uint32_t *proc_vector32; char **proc_vector; size_t vsize, size; int i, error; error = 0; if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)PTRIN(pss.ps_argvstr); vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_ENV: vptr = (vm_offset_t)PTRIN(pss.ps_envstr); vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(int32_t); break; case PROC_AUX: vptr = (vm_offset_t)PTRIN(pss.ps_envstr) + (pss.ps_nenvstr + 1) * sizeof(int32_t); if (vptr % 4 != 0) return (ENOEXEC); for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); } proc_vector32 = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector32, size) != size) { error = ENOMEM; goto done; } if (type == PROC_AUX) { *proc_vectorp = (char **)proc_vector32; *vsizep = vsize; return (0); } proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK); for (i = 0; i < (int)vsize; i++) proc_vector[i] = PTRIN(proc_vector32[i]); *proc_vectorp = proc_vector; *vsizep = vsize; done: free(proc_vector32, M_TEMP); return (error); } #endif static int get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct ps_strings pss; Elf_Auxinfo aux; vm_offset_t vptr, ptr; char **proc_vector; size_t vsize, size; int i; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) return (get_proc_vector32(td, p, proc_vectorp, vsizep, type)); #endif if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss, sizeof(pss)) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: vptr = (vm_offset_t)pss.ps_argvstr; vsize = pss.ps_nargvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_ENV: vptr = (vm_offset_t)pss.ps_envstr; vsize = pss.ps_nenvstr; if (vsize > ARG_MAX) return (ENOEXEC); size = vsize * sizeof(char *); break; case PROC_AUX: /* * The aux array is just above env array on the stack. Check * that the address is naturally aligned. */ vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1) * sizeof(char *); #if __ELF_WORD_SIZE == 64 if (vptr % sizeof(uint64_t) != 0) #else if (vptr % sizeof(uint32_t) != 0) #endif return (ENOEXEC); /* * We count the array size reading the aux vectors from the * stack until AT_NULL vector is returned. So (to keep the code * simple) we read the process stack twice: the first time here * to find the size and the second time when copying the vectors * to the allocated proc_vector. */ for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; ptr += sizeof(aux); } /* * If the PROC_AUXV_MAX entries are iterated over, and we have * not reached AT_NULL, it is most likely we are reading wrong * data: either the process doesn't have auxv array or data has * been modified. Return the error in this case. */ if (aux.a_type != AT_NULL) return (ENOEXEC); vsize = i + 1; size = vsize * sizeof(aux); break; default: KASSERT(0, ("Wrong proc vector type: %d", type)); return (EINVAL); /* In case we are built without INVARIANTS. */ } proc_vector = malloc(size, M_TEMP, M_WAITOK); if (proc_readmem(td, p, vptr, proc_vector, size) != size) { free(proc_vector, M_TEMP); return (ENOMEM); } *proc_vectorp = proc_vector; *vsizep = vsize; return (0); } #define GET_PS_STRINGS_CHUNK_SZ 256 /* Chunk size (bytes) for ps_strings operations. */ static int get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb, enum proc_vector_type type) { size_t done, len, nchr, vsize; int error, i; char **proc_vector, *sptr; char pss_string[GET_PS_STRINGS_CHUNK_SZ]; PROC_ASSERT_HELD(p); /* * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes. */ nchr = 2 * (PATH_MAX + ARG_MAX); error = get_proc_vector(td, p, &proc_vector, &vsize, type); if (error != 0) return (error); for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) { /* * The program may have scribbled into its argv array, e.g. to * remove some arguments. If that has happened, break out * before trying to read from NULL. */ if (proc_vector[i] == NULL) break; for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) { error = proc_read_string(td, p, sptr, pss_string, sizeof(pss_string)); if (error != 0) goto done; len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ); if (done + len >= nchr) len = nchr - done - 1; sbuf_bcat(sb, pss_string, len); if (len != GET_PS_STRINGS_CHUNK_SZ) break; done += GET_PS_STRINGS_CHUNK_SZ; } sbuf_bcat(sb, "", 1); done += len + 1; } done: free(proc_vector, M_TEMP); return (error); } int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ARG)); } int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb) { return (get_ps_strings(curthread, p, sb, PROC_ENV)); } int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) { size_t vsize, size; char **auxv; int error; error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX); if (error == 0) { #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) size = vsize * sizeof(Elf32_Auxinfo); else #endif size = vsize * sizeof(Elf_Auxinfo); if (sbuf_bcat(sb, auxv, size) != 0) error = ENOMEM; free(auxv, M_TEMP); } return (error); } /* * This sysctl allows a process to retrieve the argument list or process * title for another process without groping around in the address space * of the other process. It also allow a process to set its own "process * title to a string of its own choice. */ static int sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct pargs *newpa, *pa; struct proc *p; struct sbuf sb; int flags, error = 0, error2; if (namelen != 1) return (EINVAL); flags = PGET_CANSEE; if (req->newptr != NULL) flags |= PGET_ISCURRENT; error = pget((pid_t)name[0], flags, &p); if (error) return (error); pa = p->p_args; if (pa != NULL) { pargs_hold(pa); PROC_UNLOCK(p); error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length); pargs_drop(pa); } else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) { _PHOLD(p); PROC_UNLOCK(p); sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getargv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); if (error == 0 && error2 != 0) error = error2; } else { PROC_UNLOCK(p); } if (error != 0 || req->newptr == NULL) return (error); if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) return (ENOMEM); newpa = pargs_alloc(req->newlen); error = SYSCTL_IN(req, newpa->ar_args, req->newlen); if (error != 0) { pargs_free(newpa); return (error); } PROC_LOCK(p); pa = p->p_args; p->p_args = newpa; PROC_UNLOCK(p); pargs_drop(pa); return (0); } /* * This sysctl allows a process to retrieve environment of another process. */ static int sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getenvv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * This sysctl allows a process to retrieve ELF auxiliary vector of * another process. */ static int sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct sbuf sb; int error, error2; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); if ((p->p_flag & P_SYSTEM) != 0) { PRELE(p); return (0); } sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = proc_getauxv(curthread, p, &sb); error2 = sbuf_finish(&sb); PRELE(p); sbuf_delete(&sb); return (error != 0 ? error : error2); } /* * This sysctl allows a process to retrieve the path of the executable for * itself or another process. */ static int sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; struct vnode *vp; char *retbuf, *freebuf; int error; if (arglen != 1) return (EINVAL); if (*pidp == -1) { /* -1 means this process */ p = req->td->td_proc; } else { error = pget(*pidp, PGET_CANSEE, &p); if (error != 0) return (error); } vp = p->p_textvp; if (vp == NULL) { if (*pidp != -1) PROC_UNLOCK(p); return (0); } vref(vp); if (*pidp != -1) PROC_UNLOCK(p); error = vn_fullpath(req->td, vp, &retbuf, &freebuf); vrele(vp); if (error) return (error); error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1); free(freebuf, M_TEMP); return (error); } static int sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS) { struct proc *p; char *sv_name; int *name; int namelen; int error; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_CANSEE, &p); if (error != 0) return (error); sv_name = p->p_sysent->sv_name; PROC_UNLOCK(p); return (sysctl_handle_string(oidp, sv_name, 0, req)); } #ifdef KINFO_OVMENTRY_SIZE CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE); #endif #ifdef COMPAT_FREEBSD7 static int sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS) { vm_map_entry_t entry, tmp_entry; unsigned int last_timestamp; char *fullpath, *freepath; struct kinfo_ovmentry *kve; struct vattr va; struct ucred *cred; int error, *name; struct vnode *vp; struct proc *p; vm_map_t map; struct vmspace *vm; name = (int *)arg1; error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { vm_object_t obj, tobj, lobj; vm_offset_t addr; if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; bzero(kve, sizeof(*kve)); kve->kve_structsize = sizeof(*kve); kve->kve_private_resident = 0; obj = entry->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); if (obj->shadow_count == 1) kve->kve_private_resident = obj->resident_page_count; } kve->kve_resident = 0; addr = entry->start; while (addr < entry->end) { if (pmap_extract(map->pmap, addr)) kve->kve_resident++; addr += PAGE_SIZE; } for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } kve->kve_start = (void*)entry->start; kve->kve_end = (void*)entry->end; kve->kve_offset = (off_t)entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; last_timestamp = map->timestamp; vm_map_unlock_read(map); kve->kve_fileid = 0; kve->kve_fsid = 0; freepath = NULL; fullpath = ""; if (lobj) { vp = NULL; switch (lobj->type) { case OBJT_DEFAULT: kve->kve_type = KVME_TYPE_DEFAULT; break; case OBJT_VNODE: kve->kve_type = KVME_TYPE_VNODE; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: if ((lobj->flags & OBJ_TMPFS_NODE) != 0) { kve->kve_type = KVME_TYPE_VNODE; if ((lobj->flags & OBJ_TMPFS) != 0) { vp = lobj->un_pager.swp.swp_tmpfs; vref(vp); } } else { kve->kve_type = KVME_TYPE_SWAP; } break; case OBJT_DEVICE: kve->kve_type = KVME_TYPE_DEVICE; break; case OBJT_PHYS: kve->kve_type = KVME_TYPE_PHYS; break; case OBJT_DEAD: kve->kve_type = KVME_TYPE_DEAD; break; case OBJT_SG: kve->kve_type = KVME_TYPE_SG; break; default: kve->kve_type = KVME_TYPE_UNKNOWN; break; } if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(curthread, vp, &fullpath, &freepath); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_fileid = va.va_fileid; + /* truncate */ kve->kve_fsid = va.va_fsid; } vput(vp); } } else { kve->kve_type = KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); error = SYSCTL_OUT(req, kve, sizeof(*kve)); vm_map_lock_read(map); if (error) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } #endif /* COMPAT_FREEBSD7 */ #ifdef KINFO_VMENTRY_SIZE CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); #endif static void kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry, struct kinfo_vmentry *kve) { vm_object_t obj, tobj; vm_page_t m, m_adv; vm_offset_t addr; vm_paddr_t locked_pa; vm_pindex_t pi, pi_adv, pindex; locked_pa = 0; obj = entry->object.vm_object; addr = entry->start; m_adv = NULL; pi = OFF_TO_IDX(entry->offset); for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) { if (m_adv != NULL) { m = m_adv; } else { pi_adv = atop(entry->end - addr); pindex = pi; for (tobj = obj;; tobj = tobj->backing_object) { m = vm_page_find_least(tobj, pindex); if (m != NULL) { if (m->pindex == pindex) break; if (pi_adv > m->pindex - pindex) { pi_adv = m->pindex - pindex; m_adv = m; } } if (tobj->backing_object == NULL) goto next; pindex += OFF_TO_IDX(tobj-> backing_object_offset); } } m_adv = NULL; if (m->psind != 0 && addr + pagesizes[1] <= entry->end && (addr & (pagesizes[1] - 1)) == 0 && (pmap_mincore(map->pmap, addr, &locked_pa) & MINCORE_SUPER) != 0) { kve->kve_flags |= KVME_FLAG_SUPER; pi_adv = atop(pagesizes[1]); } else { /* * We do not test the found page on validity. * Either the page is busy and being paged in, * or it was invalidated. The first case * should be counted as resident, the second * is not so clear; we do account both. */ pi_adv = 1; } kve->kve_resident += pi_adv; next:; } PA_UNLOCK_COND(locked_pa); } /* * Must be called with the process locked and will return unlocked. */ int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { vm_map_entry_t entry, tmp_entry; struct vattr va; vm_map_t map; vm_object_t obj, tobj, lobj; char *fullpath, *freepath; struct kinfo_vmentry *kve; struct ucred *cred; struct vnode *vp; struct vmspace *vm; vm_offset_t addr; unsigned int last_timestamp; int error; PROC_LOCK_ASSERT(p, MA_OWNED); _PHOLD(p); PROC_UNLOCK(p); vm = vmspace_acquire_ref(p); if (vm == NULL) { PRELE(p); return (ESRCH); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); error = 0; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) continue; addr = entry->end; bzero(kve, sizeof(*kve)); obj = entry->object.vm_object; if (obj != NULL) { for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { VM_OBJECT_RLOCK(tobj); lobj = tobj; } if (obj->backing_object == NULL) kve->kve_private_resident = obj->resident_page_count; if (!vmmap_skip_res_cnt) kern_proc_vmmap_resident(map, entry, kve); for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj && tobj != lobj) VM_OBJECT_RUNLOCK(tobj); } } else { lobj = NULL; } kve->kve_start = entry->start; kve->kve_end = entry->end; kve->kve_offset = entry->offset; if (entry->protection & VM_PROT_READ) kve->kve_protection |= KVME_PROT_READ; if (entry->protection & VM_PROT_WRITE) kve->kve_protection |= KVME_PROT_WRITE; if (entry->protection & VM_PROT_EXECUTE) kve->kve_protection |= KVME_PROT_EXEC; if (entry->eflags & MAP_ENTRY_COW) kve->kve_flags |= KVME_FLAG_COW; if (entry->eflags & MAP_ENTRY_NEEDS_COPY) kve->kve_flags |= KVME_FLAG_NEEDS_COPY; if (entry->eflags & MAP_ENTRY_NOCOREDUMP) kve->kve_flags |= KVME_FLAG_NOCOREDUMP; if (entry->eflags & MAP_ENTRY_GROWS_UP) kve->kve_flags |= KVME_FLAG_GROWS_UP; if (entry->eflags & MAP_ENTRY_GROWS_DOWN) kve->kve_flags |= KVME_FLAG_GROWS_DOWN; last_timestamp = map->timestamp; vm_map_unlock_read(map); freepath = NULL; fullpath = ""; if (lobj != NULL) { vp = NULL; switch (lobj->type) { case OBJT_DEFAULT: kve->kve_type = KVME_TYPE_DEFAULT; break; case OBJT_VNODE: kve->kve_type = KVME_TYPE_VNODE; vp = lobj->handle; vref(vp); break; case OBJT_SWAP: if ((lobj->flags & OBJ_TMPFS_NODE) != 0) { kve->kve_type = KVME_TYPE_VNODE; if ((lobj->flags & OBJ_TMPFS) != 0) { vp = lobj->un_pager.swp.swp_tmpfs; vref(vp); } } else { kve->kve_type = KVME_TYPE_SWAP; } break; case OBJT_DEVICE: kve->kve_type = KVME_TYPE_DEVICE; break; case OBJT_PHYS: kve->kve_type = KVME_TYPE_PHYS; break; case OBJT_DEAD: kve->kve_type = KVME_TYPE_DEAD; break; case OBJT_SG: kve->kve_type = KVME_TYPE_SG; break; case OBJT_MGTDEVICE: kve->kve_type = KVME_TYPE_MGTDEVICE; break; default: kve->kve_type = KVME_TYPE_UNKNOWN; break; } if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); kve->kve_ref_count = obj->ref_count; kve->kve_shadow_count = obj->shadow_count; VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(curthread, vp, &fullpath, &freepath); kve->kve_vn_type = vntype_to_kinfo(vp->v_type); cred = curthread->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_vn_fileid = va.va_fileid; kve->kve_vn_fsid = va.va_fsid; + kve->kve_vn_fsid_freebsd11 = + kve->kve_vn_fsid; /* truncate */ kve->kve_vn_mode = MAKEIMODE(va.va_type, va.va_mode); kve->kve_vn_size = va.va_size; kve->kve_vn_rdev = va.va_rdev; + kve->kve_vn_rdev_freebsd11 = + kve->kve_vn_rdev; /* truncate */ kve->kve_status = KF_ATTR_VALID; } vput(vp); } } else { kve->kve_type = KVME_TYPE_NONE; kve->kve_ref_count = 0; kve->kve_shadow_count = 0; } strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ if ((flags & KERN_VMMAP_PACK_KINFO) != 0) kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) + strlen(kve->kve_path) + 1; else kve->kve_structsize = sizeof(*kve); kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); /* Halt filling and truncate rather than exceeding maxlen */ if (maxlen != -1 && maxlen < kve->kve_structsize) { error = 0; vm_map_lock_read(map); break; } else if (maxlen != -1) maxlen -= kve->kve_structsize; if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) error = ENOMEM; vm_map_lock_read(map); if (error != 0) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); entry = tmp_entry; } } vm_map_unlock_read(map); vmspace_free(vm); PRELE(p); free(kve, M_TEMP); return (error); } static int sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) { struct proc *p; struct sbuf sb; int error, error2, *name; name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } #if defined(STACK) || defined(DDB) static int sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS) { struct kinfo_kstack *kkstp; int error, i, *name, numthreads; lwpid_t *lwpidarray; struct thread *td; struct stack *st; struct sbuf sb; struct proc *p; name = (int *)arg1; error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p); if (error != 0) return (error); kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK); st = stack_create(); lwpidarray = NULL; PROC_LOCK(p); do { if (lwpidarray != NULL) { free(lwpidarray, M_TEMP); lwpidarray = NULL; } numthreads = p->p_numthreads; PROC_UNLOCK(p); lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP, M_WAITOK | M_ZERO); PROC_LOCK(p); } while (numthreads < p->p_numthreads); /* * XXXRW: During the below loop, execve(2) and countless other sorts * of changes could have taken place. Should we check to see if the * vmspace has been replaced, or the like, in order to prevent * giving a snapshot that spans, say, execve(2), with some threads * before and some after? Among other things, the credentials could * have changed, in which case the right to extract debug info might * no longer be assured. */ i = 0; FOREACH_THREAD_IN_PROC(p, td) { KASSERT(i < numthreads, ("sysctl_kern_proc_kstack: numthreads")); lwpidarray[i] = td->td_tid; i++; } numthreads = i; for (i = 0; i < numthreads; i++) { td = thread_find(p, lwpidarray[i]); if (td == NULL) { continue; } bzero(kkstp, sizeof(*kkstp)); (void)sbuf_new(&sb, kkstp->kkst_trace, sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN); thread_lock(td); kkstp->kkst_tid = td->td_tid; if (TD_IS_SWAPPED(td)) { kkstp->kkst_state = KKST_STATE_SWAPPED; } else if (TD_IS_RUNNING(td)) { if (stack_save_td_running(st, td) == 0) kkstp->kkst_state = KKST_STATE_STACKOK; else kkstp->kkst_state = KKST_STATE_RUNNING; } else { kkstp->kkst_state = KKST_STATE_STACKOK; stack_save_td(st, td); } thread_unlock(td); PROC_UNLOCK(p); stack_sbuf_print(&sb, st); sbuf_finish(&sb); sbuf_delete(&sb); error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp)); PROC_LOCK(p); if (error) break; } _PRELE(p); PROC_UNLOCK(p); if (lwpidarray != NULL) free(lwpidarray, M_TEMP); stack_destroy(st); free(kkstp, M_TEMP); return (error); } #endif /* * This sysctl allows a process to retrieve the full list of groups from * itself or another process. */ static int sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS) { pid_t *pidp = (pid_t *)arg1; unsigned int arglen = arg2; struct proc *p; struct ucred *cred; int error; if (arglen != 1) return (EINVAL); if (*pidp == -1) { /* -1 means this process */ p = req->td->td_proc; PROC_LOCK(p); } else { error = pget(*pidp, PGET_CANSEE, &p); if (error != 0) return (error); } cred = crhold(p->p_ucred); PROC_UNLOCK(p); error = SYSCTL_OUT(req, cred->cr_groups, cred->cr_ngroups * sizeof(gid_t)); crfree(cred); return (error); } /* * This sysctl allows a process to retrieve or/and set the resource limit for * another process. */ static int sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct rlimit rlim; struct proc *p; u_int which; int flags, error; if (namelen != 2) return (EINVAL); which = (u_int)name[1]; if (which >= RLIM_NLIMITS) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(rlim)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); /* * Retrieve limit. */ if (req->oldptr != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); } error = SYSCTL_OUT(req, &rlim, sizeof(rlim)); if (error != 0) goto errout; /* * Set limit. */ if (req->newptr != NULL) { error = SYSCTL_IN(req, &rlim, sizeof(rlim)); if (error == 0) error = kern_proc_setrlimit(curthread, p, which, &rlim); } errout: PRELE(p); return (error); } /* * This sysctl allows a process to retrieve ps_strings structure location of * another process. */ static int sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; vm_offset_t ps_strings; int error; #ifdef COMPAT_FREEBSD32 uint32_t ps_strings32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { /* * We return 0 if the 32 bit emulation request is for a 64 bit * process. */ ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ? PTROUT(p->p_sysent->sv_psstrings) : 0; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32)); return (error); } #endif ps_strings = p->p_sysent->sv_psstrings; PROC_UNLOCK(p); error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings)); return (error); } /* * This sysctl allows a process to retrieve umask of another process. */ static int sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int error; u_short fd_cmask; if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); FILEDESC_SLOCK(p->p_fd); fd_cmask = p->p_fd->fd_cmask; FILEDESC_SUNLOCK(p->p_fd); PRELE(p); error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask)); return (error); } /* * This sysctl allows a process to set and retrieve binary osreldate of * another process. */ static int sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; int flags, error, osrel; if (namelen != 1) return (EINVAL); if (req->newptr != NULL && req->newlen != sizeof(osrel)) return (EINVAL); flags = PGET_HOLD | PGET_NOTWEXIT; if (req->newptr != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel)); if (error != 0) goto errout; if (req->newptr != NULL) { error = SYSCTL_IN(req, &osrel, sizeof(osrel)); if (error != 0) goto errout; if (osrel < 0) { error = EINVAL; goto errout; } p->p_osrel = osrel; } errout: PRELE(p); return (error); } static int sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; struct kinfo_sigtramp kst; const struct sysentvec *sv; int error; #ifdef COMPAT_FREEBSD32 struct kinfo_sigtramp32 kst32; #endif if (namelen != 1) return (EINVAL); error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); sv = p->p_sysent; #ifdef COMPAT_FREEBSD32 if ((req->flags & SCTL_MASK32) != 0) { bzero(&kst32, sizeof(kst32)); if (SV_PROC_FLAG(p, SV_ILP32)) { if (sv->sv_sigcode_base != 0) { kst32.ksigtramp_start = sv->sv_sigcode_base; kst32.ksigtramp_end = sv->sv_sigcode_base + *sv->sv_szsigcode; } else { kst32.ksigtramp_start = sv->sv_psstrings - *sv->sv_szsigcode; kst32.ksigtramp_end = sv->sv_psstrings; } } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst32, sizeof(kst32)); return (error); } #endif bzero(&kst, sizeof(kst)); if (sv->sv_sigcode_base != 0) { kst.ksigtramp_start = (char *)sv->sv_sigcode_base; kst.ksigtramp_end = (char *)sv->sv_sigcode_base + *sv->sv_szsigcode; } else { kst.ksigtramp_start = (char *)sv->sv_psstrings - *sv->sv_szsigcode; kst.ksigtramp_end = (char *)sv->sv_psstrings; } PROC_UNLOCK(p); error = SYSCTL_OUT(req, &kst, sizeof(kst)); return (error); } SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT| CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, no threads"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_args, "Process argument list"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_env, "Process environment"); static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name, "Process syscall vector name (ABI type)"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD), sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table"); static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Return process table, no threads"); #ifdef COMPAT_FREEBSD7 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries"); #if defined(STACK) || defined(DDB) static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks"); #endif static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups"); static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit, "Process resource limits"); static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings, "Process ps_strings location"); static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask"); static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel, "Process binary osreldate"); static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp, "Process signal trampoline location"); int allproc_gen; /* * stop_all_proc() purpose is to stop all process which have usermode, * except current process for obvious reasons. This makes it somewhat * unreliable when invoked from multithreaded process. The service * must not be user-callable anyway. */ void stop_all_proc(void) { struct proc *cp, *p; int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; cp = curproc; allproc_loop: sx_xlock(&allproc_lock); gen = allproc_gen; seen_exiting = seen_stopped = stopped_some = restart = false; LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { PROC_UNLOCK(p); continue; } if ((p->p_flag & P_WEXIT) != 0) { seen_exiting = true; PROC_UNLOCK(p); continue; } if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { /* * Stopped processes are tolerated when there * are no other processes which might continue * them. P_STOPPED_SINGLE but not * P_TOTAL_STOP process still has at least one * thread running. */ seen_stopped = true; PROC_UNLOCK(p); continue; } _PHOLD(p); sx_xunlock(&allproc_lock); r = thread_single(p, SINGLE_ALLPROC); if (r != 0) restart = true; else stopped_some = true; _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } /* Catch forked children we did not see in iteration. */ if (gen != allproc_gen) restart = true; sx_xunlock(&allproc_lock); if (restart || stopped_some || seen_exiting || seen_stopped) { kern_yield(PRI_USER); goto allproc_loop; } } void resume_all_proc(void) { struct proc *cp, *p; cp = curproc; sx_xlock(&allproc_lock); LIST_REMOVE(cp, p_list); LIST_INSERT_HEAD(&allproc, cp, p_list); for (;;) { p = LIST_NEXT(cp, p_list); if (p == NULL) break; LIST_REMOVE(cp, p_list); LIST_INSERT_AFTER(p, cp, p_list); PROC_LOCK(p); if ((p->p_flag & P_TOTAL_STOP) != 0) { sx_xunlock(&allproc_lock); _PHOLD(p); thread_single_end(p, SINGLE_ALLPROC); _PRELE(p); PROC_UNLOCK(p); sx_xlock(&allproc_lock); } else { PROC_UNLOCK(p); } } sx_xunlock(&allproc_lock); } /* #define TOTAL_STOP_DEBUG 1 */ #ifdef TOTAL_STOP_DEBUG volatile static int ap_resume; #include static int sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS) { int error, val; val = 0; ap_resume = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val != 0) { stop_all_proc(); syncer_suspend(); while (ap_resume == 0) ; syncer_resume(); resume_all_proc(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0, sysctl_debug_stop_all_proc, "I", ""); #endif Index: head/sys/kern/makesyscalls.sh =================================================================== --- head/sys/kern/makesyscalls.sh (revision 318735) +++ head/sys/kern/makesyscalls.sh (revision 318736) @@ -1,683 +1,712 @@ #! /bin/sh - # @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93 # $FreeBSD$ set -e # name of compat options: compat=COMPAT_43 compat4=COMPAT_FREEBSD4 compat6=COMPAT_FREEBSD6 compat7=COMPAT_FREEBSD7 compat10=COMPAT_FREEBSD10 +compat11=COMPAT_FREEBSD11 # output files: sysnames="syscalls.c" sysproto="../sys/sysproto.h" sysproto_h=_SYS_SYSPROTO_H_ syshdr="../sys/syscall.h" sysmk="../sys/syscall.mk" syssw="init_sysent.c" syscallprefix="SYS_" switchname="sysent" namesname="syscallnames" systrace="systrace_args.c" # tmp files: sysaue="sysent.aue.$$" sysdcl="sysent.dcl.$$" syscompat="sysent.compat.$$" syscompatdcl="sysent.compatdcl.$$" syscompat4="sysent.compat4.$$" syscompat4dcl="sysent.compat4dcl.$$" syscompat6="sysent.compat6.$$" syscompat6dcl="sysent.compat6dcl.$$" syscompat7="sysent.compat7.$$" syscompat7dcl="sysent.compat7dcl.$$" syscompat10="sysent.compat10.$$" syscompat10dcl="sysent.compat10dcl.$$" +syscompat11="sysent.compat11.$$" +syscompat11dcl="sysent.compat11dcl.$$" sysent="sysent.switch.$$" sysinc="sysinc.switch.$$" sysarg="sysarg.switch.$$" sysprotoend="sysprotoend.$$" systracetmp="systrace.$$" systraceret="systraceret.$$" if [ -r capabilities.conf ]; then capenabled=`cat capabilities.conf | grep -v "^#" | grep -v "^$"` capenabled=`echo $capenabled | sed 's/ /,/g'` else capenabled="" fi -trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0 +trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0 -touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret +touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret case $# in 0) echo "usage: $0 input-file " 1>&2 exit 1 ;; esac if [ -n "$2" ]; then . $2 fi sed -e ' :join /\\$/{a\ N s/\\\n// b join } 2,${ /^#/!s/\([{}()*,]\)/ \1 /g } ' < $1 | awk " BEGIN { sysaue = \"$sysaue\" sysdcl = \"$sysdcl\" sysproto = \"$sysproto\" sysprotoend = \"$sysprotoend\" sysproto_h = \"$sysproto_h\" syscompat = \"$syscompat\" syscompatdcl = \"$syscompatdcl\" syscompat4 = \"$syscompat4\" syscompat4dcl = \"$syscompat4dcl\" syscompat6 = \"$syscompat6\" syscompat6dcl = \"$syscompat6dcl\" syscompat7 = \"$syscompat7\" syscompat7dcl = \"$syscompat7dcl\" syscompat10 = \"$syscompat10\" syscompat10dcl = \"$syscompat10dcl\" + syscompat11 = \"$syscompat11\" + syscompat11dcl = \"$syscompat11dcl\" sysent = \"$sysent\" syssw = \"$syssw\" sysinc = \"$sysinc\" sysarg = \"$sysarg\" sysnames = \"$sysnames\" syshdr = \"$syshdr\" sysmk = \"$sysmk\" systrace = \"$systrace\" systracetmp = \"$systracetmp\" systraceret = \"$systraceret\" compat = \"$compat\" compat4 = \"$compat4\" compat6 = \"$compat6\" compat7 = \"$compat7\" compat10 = \"$compat10\" + compat11 = \"$compat11\" syscallprefix = \"$syscallprefix\" switchname = \"$switchname\" namesname = \"$namesname\" infile = \"$1\" capenabled_string = \"$capenabled\" "' split(capenabled_string, capenabled, ","); printf "\n/* The casts are bogus but will do for now. */\n" > sysent printf "struct sysent %s[] = {\n",switchname > sysent printf "/*\n * System call switch table.\n *\n" > syssw printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw printf " * $%s$\n", "FreeBSD" > syssw printf " */\n\n" > syssw printf "/*\n * System call prototypes.\n *\n" > sysarg printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysarg printf " * $%s$\n", "FreeBSD" > sysarg printf " */\n\n" > sysarg printf "#ifndef %s\n", sysproto_h > sysarg printf "#define\t%s\n\n", sysproto_h > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n\n" > sysarg printf "#include \n\n" > sysarg printf "struct proc;\n\n" > sysarg printf "struct thread;\n\n" > sysarg printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) ? \\\n" > sysarg printf "\t\t0 : sizeof(register_t) - sizeof(t))\n\n" > sysarg printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg printf "#define\tPADL_(t)\t0\n" > sysarg printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg printf "#else\n" > sysarg printf "#define\tPADL_(t)\tPAD_(t)\n" > sysarg printf "#define\tPADR_(t)\t0\n" > sysarg printf "#endif\n\n" > sysarg printf "\n#ifdef %s\n\n", compat > syscompat printf "\n#ifdef %s\n\n", compat4 > syscompat4 printf "\n#ifdef %s\n\n", compat6 > syscompat6 printf "\n#ifdef %s\n\n", compat7 > syscompat7 printf "\n#ifdef %s\n\n", compat10 > syscompat10 + printf "\n#ifdef %s\n\n", compat11 > syscompat11 printf "/*\n * System call names.\n *\n" > sysnames printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames printf " * $%s$\n", "FreeBSD" > sysnames printf " */\n\n" > sysnames printf "const char *%s[] = {\n", namesname > sysnames printf "/*\n * System call numbers.\n *\n" > syshdr printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr printf " * $%s$\n", "FreeBSD" > syshdr printf " */\n\n" > syshdr printf "# FreeBSD system call object files.\n" > sysmk printf "# DO NOT EDIT-- this file is automatically generated.\n" > sysmk printf "# $%s$\n", "FreeBSD" > sysmk printf "MIASM = " > sysmk printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace printf " * $%s$\n", "FreeBSD" > systrace printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace printf "\tint64_t *iarg = (int64_t *) uarg;\n" > systrace printf "\tswitch (sysnum) {\n" > systrace printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp printf "\tswitch (sysnum) {\n" > systracetmp printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret printf "\tswitch (sysnum) {\n" > systraceret } NR == 1 { next } NF == 0 || $1 ~ /^;/ { next } $1 ~ /^#[ ]*include/ { print > sysinc next } $1 ~ /^#[ ]*if/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 + print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret savesyscall = syscall next } $1 ~ /^#[ ]*else/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 + print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret syscall = savesyscall next } $1 ~ /^#/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 + print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret next } syscall != $1 { printf "%s: line %d: syscall number out of sync at %d\n", infile, NR, syscall printf "line is:\n" print exit 1 } # Returns true if the type "name" is the first flag in the type field function type(name, flags, n) { n = split($3, flags, /\|/) return (n > 0 && flags[1] == name) } # Returns true if the flag "name" is set in the type field function flag(name, flags, i, n) { n = split($3, flags, /\|/) for (i = 1; i <= n; i++) if (flags[i] == name) return 1 return 0 } function align_sysent_comment(column) { printf("\t") > sysent column = column + 8 - column % 8 while (column < 56) { printf("\t") > sysent column = column + 8 } } function parserr(was, wanted) { printf "%s: line %d: unexpected %s (expected %s)\n", infile, NR, was, wanted exit 1 } function parseline() { f=4 # toss number, type, audit event argc= 0; argssize = "0" thr_flag = "SY_THR_STATIC" if (flag("NOTSTATIC")) { thr_flag = "SY_THR_ABSENT" } if ($NF != "}") { funcalias=$(NF-2) argalias=$(NF-1) rettype=$NF end=NF-3 } else { funcalias="" argalias="" rettype="int" end=NF } if (flag("NODEF")) { auditev="AUE_NULL" funcname=$4 argssize = "AS(" $6 ")" return } if ($f != "{") parserr($f, "{") f++ if ($end != "}") parserr($end, "}") end-- if ($end != ";") parserr($end, ";") end-- if ($end != ")") parserr($end, ")") end-- syscallret=$f f++ funcname=$f # # We now know the func name, so define a flags field for it. # Do this before any other processing as we may return early # from it. # for (cap in capenabled) { if (funcname == capenabled[cap]) { flags = "SYF_CAPENABLED"; break; } } if (funcalias == "") funcalias = funcname if (argalias == "") { argalias = funcname "_args" if (flag("COMPAT")) argalias = "o" argalias if (flag("COMPAT4")) argalias = "freebsd4_" argalias if (flag("COMPAT6")) argalias = "freebsd6_" argalias if (flag("COMPAT7")) argalias = "freebsd7_" argalias if (flag("COMPAT10")) argalias = "freebsd10_" argalias + if (flag("COMPAT11")) + argalias = "freebsd11_" argalias } f++ if ($f != "(") parserr($f, ")") f++ if (f == end) { if ($f != "void") parserr($f, "argument definition") return } while (f <= end) { argc++ argtype[argc]="" oldf="" while (f < end && $(f+1) != ",") { if (argtype[argc] != "" && oldf != "*") argtype[argc] = argtype[argc]" "; argtype[argc] = argtype[argc]$f; oldf = $f; f++ } if (argtype[argc] == "") parserr($f, "argument definition") argname[argc]=$f; f += 2; # skip name, and any comma } if (argc != 0) argssize = "AS(" argalias ")" } { comment = $4 if (NF < 7) for (i = 5; i <= NF; i++) comment = comment " " $i } # # The AUE_ audit event identifier. # { auditev = $2; } # # The flags, if any. # { flags = "0"; } type("STD") || type("NODEF") || type("NOARGS") || type("NOPROTO") \ || type("NOSTD") { parseline() printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret if (argc > 0) { printf("\t\tswitch(ndx) {\n") > systracetmp printf("\t\tstruct %s *p = params;\n", argalias) > systrace for (i = 1; i <= argc; i++) { arg = argtype[i] sub("__restrict$", "", arg) if (index(arg, "*") > 0) printf("\t\tcase %d:\n\t\t\tp = \"userland %s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp else printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp if (index(arg, "*") > 0 || arg == "caddr_t") printf("\t\tuarg[%d] = (intptr_t) p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (arg == "union l_semun") printf("\t\tuarg[%d] = p->%s.buf; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (substr(arg, 1, 1) == "u" || arg == "size_t") printf("\t\tuarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else printf("\t\tiarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace } printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret printf("\t\t\tp = \"%s\";\n", syscallret) > systraceret printf("\t\tbreak;\n") > systraceret } printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace printf("\t\tbreak;\n") > systracetmp if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > sysarg for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; " \ "%s %s; char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > sysarg printf("};\n") > sysarg } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { if (funcname == "nosys" || funcname == "lkmnosys" || funcname == "sysarch" || funcname ~ /^freebsd/ || funcname ~ /^linux/ || funcname ~ /^ibcs2/ || funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { printf("%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } else { printf("%s\tsys_%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } printf(";\n") > sysdcl printf("#define\t%sAUE_%s\t%s\n", syscallprefix, funcalias, auditev) > sysaue } printf("\t{ %s, (sy_call_t *)", argssize) > sysent column = 8 + 2 + length(argssize) + 15 if (flag("NOSTD")) { printf("lkmressys, AUE_NULL, NULL, 0, 0, %s, SY_THR_ABSENT },", flags) > sysent column = column + length("lkmressys") + length("AUE_NULL") + 3 } else { if (funcname == "nosys" || funcname == "sysarch" || funcname == "lkmnosys" || funcname ~ /^freebsd/ || funcname ~ /^linux/ || funcname ~ /^ibcs2/ || funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 } else { printf("sys_%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 + 4 } } align_sysent_comment(column) printf("/* %d = %s */\n", syscall, funcalias) > sysent printf("\t\"%s\",\t\t\t/* %d = %s */\n", funcalias, syscall, funcalias) > sysnames if (!flag("NODEF")) { printf("#define\t%s%s\t%d\n", syscallprefix, funcalias, syscall) > syshdr printf(" \\\n\t%s.o", funcalias) > sysmk } syscall++ next } type("COMPAT") || type("COMPAT4") || type("COMPAT6") || \ - type("COMPAT7") || type("COMPAT10") { + type("COMPAT7") || type("COMPAT10") || type("COMPAT11") { if (flag("COMPAT")) { ncompat++ out = syscompat outdcl = syscompatdcl wrap = "compat" prefix = "o" descr = "old" } else if (flag("COMPAT4")) { ncompat4++ out = syscompat4 outdcl = syscompat4dcl wrap = "compat4" prefix = "freebsd4_" descr = "freebsd4" } else if (flag("COMPAT6")) { ncompat6++ out = syscompat6 outdcl = syscompat6dcl wrap = "compat6" prefix = "freebsd6_" descr = "freebsd6" } else if (flag("COMPAT7")) { ncompat7++ out = syscompat7 outdcl = syscompat7dcl wrap = "compat7" prefix = "freebsd7_" descr = "freebsd7" } else if (flag("COMPAT10")) { ncompat10++ out = syscompat10 outdcl = syscompat10dcl wrap = "compat10" prefix = "freebsd10_" descr = "freebsd10" + } else if (flag("COMPAT11")) { + ncompat11++ + out = syscompat11 + outdcl = syscompat11dcl + wrap = "compat11" + prefix = "freebsd11_" + descr = "freebsd11" } parseline() if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > out for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; %s %s; " \ "char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > out printf("};\n") > out } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { printf("%s\t%s%s(struct thread *, struct %s *);\n", rettype, prefix, funcname, argalias) > outdcl printf("#define\t%sAUE_%s%s\t%s\n", syscallprefix, prefix, funcname, auditev) > sysaue } if (flag("NOSTD")) { printf("\t{ %s, (sy_call_t *)%s, %s, NULL, 0, 0, 0, SY_THR_ABSENT },", "0", "lkmressys", "AUE_NULL") > sysent align_sysent_comment(8 + 2 + length("0") + 15 + \ length("lkmressys") + length("AUE_NULL") + 3) } else { printf("\t{ %s(%s,%s), %s, NULL, 0, 0, %s, %s },", wrap, argssize, funcname, auditev, flags, thr_flag) > sysent align_sysent_comment(8 + 9 + length(argssize) + 1 + \ length(funcname) + length(auditev) + \ length(flags) + 4) } printf("/* %d = %s %s */\n", syscall, descr, funcalias) > sysent printf("\t\"%s.%s\",\t\t/* %d = %s %s */\n", wrap, funcalias, syscall, descr, funcalias) > sysnames # Do not provide freebsdN_* symbols in libc for < FreeBSD 7 if (flag("COMPAT") || flag("COMPAT4") || flag("COMPAT6")) { printf("\t\t\t\t/* %d is %s %s */\n", syscall, descr, funcalias) > syshdr } else if (!flag("NODEF")) { printf("#define\t%s%s%s\t%d\n", syscallprefix, prefix, funcalias, syscall) > syshdr printf(" \\\n\t%s%s.o", prefix, funcalias) > sysmk } syscall++ next } type("OBSOL") { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },") > sysent align_sysent_comment(34) printf("/* %d = obsolete %s */\n", syscall, comment) > sysent printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", $4, syscall, comment) > sysnames printf("\t\t\t\t/* %d is obsolete %s */\n", syscall, comment) > syshdr syscall++ next } type("UNIMPL") { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },\t\t\t/* %d = %s */\n", syscall, comment) > sysent printf("\t\"#%d\",\t\t\t/* %d = %s */\n", syscall, syscall, comment) > sysnames syscall++ next } { printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $3 exit 1 } END { printf "\n#define AS(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc - if (ncompat != 0 || ncompat4 != 0 || ncompat6 != 0 || ncompat7 != 0 || ncompat10 != 0) + if (ncompat != 0 || ncompat4 != 0 || ncompat6 != 0 || ncompat7 != 0 || ncompat10 != 0 || ncompat11 != 0) printf "#include \"opt_compat.h\"\n\n" > syssw if (ncompat != 0) { printf "\n#ifdef %s\n", compat > sysinc printf "#define compat(n, name) n, (sy_call_t *)__CONCAT(o,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat4 != 0) { printf "\n#ifdef %s\n", compat4 > sysinc printf "#define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat4(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat6 != 0) { printf "\n#ifdef %s\n", compat6 > sysinc printf "#define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat6(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat7 != 0) { printf "\n#ifdef %s\n", compat7 > sysinc printf "#define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat7(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat10 != 0) { printf "\n#ifdef %s\n", compat10 > sysinc printf "#define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat10(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } + if (ncompat11 != 0) { + printf "\n#ifdef %s\n", compat11 > sysinc + printf "#define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name)\n" > sysinc + printf "#else\n" > sysinc + printf "#define compat11(n, name) 0, (sy_call_t *)nosys\n" > sysinc + printf "#endif\n" > sysinc + } + printf("\n#endif /* %s */\n\n", compat) > syscompatdcl printf("\n#endif /* %s */\n\n", compat4) > syscompat4dcl printf("\n#endif /* %s */\n\n", compat6) > syscompat6dcl printf("\n#endif /* %s */\n\n", compat7) > syscompat7dcl printf("\n#endif /* %s */\n\n", compat10) > syscompat10dcl + printf("\n#endif /* %s */\n\n", compat11) > syscompat11dcl printf("\n#undef PAD_\n") > sysprotoend printf("#undef PADL_\n") > sysprotoend printf("#undef PADR_\n") > sysprotoend printf("\n#endif /* !%s */\n", sysproto_h) > sysprotoend printf("\n") > sysmk printf("};\n") > sysent printf("};\n") > sysnames printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall) \ > syshdr printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret } ' cat $sysinc $sysent >> $syssw cat $sysarg $sysdcl \ $syscompat $syscompatdcl \ $syscompat4 $syscompat4dcl \ $syscompat6 $syscompat6dcl \ $syscompat7 $syscompat7dcl \ $syscompat10 $syscompat10dcl \ + $syscompat11 $syscompat11dcl \ $sysaue $sysprotoend > $sysproto cat $systracetmp >> $systrace cat $systraceret >> $systrace Index: head/sys/kern/sys_socket.c =================================================================== --- head/sys/kern/sys_socket.c (revision 318735) +++ head/sys/kern/sys_socket.c (revision 318736) @@ -1,804 +1,807 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static SYSCTL_NODE(_kern_ipc, OID_AUTO, aio, CTLFLAG_RD, NULL, "socket AIO stats"); static int empty_results; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_results, CTLFLAG_RD, &empty_results, 0, "socket operation returned EAGAIN"); static int empty_retries; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_retries, CTLFLAG_RD, &empty_retries, 0, "socket operation retries"); static fo_rdwr_t soo_read; static fo_rdwr_t soo_write; static fo_ioctl_t soo_ioctl; static fo_poll_t soo_poll; extern fo_kqfilter_t soo_kqfilter; static fo_stat_t soo_stat; static fo_close_t soo_close; static fo_fill_kinfo_t soo_fill_kinfo; static fo_aio_queue_t soo_aio_queue; static void soo_aio_cancel(struct kaiocb *job); struct fileops socketops = { .fo_read = soo_read, .fo_write = soo_write, .fo_truncate = invfo_truncate, .fo_ioctl = soo_ioctl, .fo_poll = soo_poll, .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_fill_kinfo = soo_fill_kinfo, .fo_aio_queue = soo_aio_queue, .fo_flags = DFLAG_PASSABLE }; static int soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct socket *so = fp->f_data; int error; #ifdef MAC error = mac_socket_check_receive(active_cred, so); if (error) return (error); #endif error = soreceive(so, 0, uio, 0, 0, 0); return (error); } static int soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct socket *so = fp->f_data; int error; #ifdef MAC error = mac_socket_check_send(active_cred, so); if (error) return (error); #endif error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td); if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) { PROC_LOCK(uio->uio_td->td_proc); tdsignal(uio->uio_td, SIGPIPE); PROC_UNLOCK(uio->uio_td->td_proc); } return (error); } static int soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { struct socket *so = fp->f_data; int error = 0; switch (cmd) { case FIONBIO: SOCK_LOCK(so); if (*(int *)data) so->so_state |= SS_NBIO; else so->so_state &= ~SS_NBIO; SOCK_UNLOCK(so); break; case FIOASYNC: /* * XXXRW: This code separately acquires SOCK_LOCK(so) and * SOCKBUF_LOCK(&so->so_rcv) even though they are the same * mutex to avoid introducing the assumption that they are * the same. */ if (*(int *)data) { SOCK_LOCK(so); so->so_state |= SS_ASYNC; SOCK_UNLOCK(so); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags |= SB_ASYNC; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_ASYNC; SOCKBUF_UNLOCK(&so->so_snd); } else { SOCK_LOCK(so); so->so_state &= ~SS_ASYNC; SOCK_UNLOCK(so); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags &= ~SB_ASYNC; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags &= ~SB_ASYNC; SOCKBUF_UNLOCK(&so->so_snd); } break; case FIONREAD: /* Unlocked read. */ *(int *)data = sbavail(&so->so_rcv); break; case FIONWRITE: /* Unlocked read. */ *(int *)data = sbavail(&so->so_snd); break; case FIONSPACE: /* Unlocked read. */ if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) || (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt)) *(int *)data = 0; else *(int *)data = sbspace(&so->so_snd); break; case FIOSETOWN: error = fsetown(*(int *)data, &so->so_sigio); break; case FIOGETOWN: *(int *)data = fgetown(&so->so_sigio); break; case SIOCSPGRP: error = fsetown(-(*(int *)data), &so->so_sigio); break; case SIOCGPGRP: *(int *)data = -fgetown(&so->so_sigio); break; case SIOCATMARK: /* Unlocked read. */ *(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0; break; default: /* * Interface/routing/protocol specific ioctls: interface and * routing ioctls should have a different entry since a * socket is unnecessary. */ if (IOCGROUP(cmd) == 'i') error = ifioctl(so, cmd, data, td); else if (IOCGROUP(cmd) == 'r') { CURVNET_SET(so->so_vnet); error = rtioctl_fib(cmd, data, so->so_fibnum); CURVNET_RESTORE(); } else { CURVNET_SET(so->so_vnet); error = ((*so->so_proto->pr_usrreqs->pru_control) (so, cmd, data, 0, td)); CURVNET_RESTORE(); } break; } return (error); } static int soo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct socket *so = fp->f_data; #ifdef MAC int error; error = mac_socket_check_poll(active_cred, so); if (error) return (error); #endif return (sopoll(so, events, fp->f_cred, td)); } static int soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, struct thread *td) { struct socket *so = fp->f_data; struct sockbuf *sb; #ifdef MAC int error; #endif bzero((caddr_t)ub, sizeof (*ub)); ub->st_mode = S_IFSOCK; #ifdef MAC error = mac_socket_check_stat(active_cred, so); if (error) return (error); #endif /* * If SBS_CANTRCVMORE is set, but there's still data left in the * receive buffer, the socket is still readable. */ sb = &so->so_rcv; SOCKBUF_LOCK(sb); if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb)) ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH; ub->st_size = sbavail(sb) - sb->sb_ctl; SOCKBUF_UNLOCK(sb); sb = &so->so_snd; SOCKBUF_LOCK(sb); if ((sb->sb_state & SBS_CANTSENDMORE) == 0) ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; SOCKBUF_UNLOCK(sb); ub->st_uid = so->so_cred->cr_uid; ub->st_gid = so->so_cred->cr_gid; return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub); } /* * API socket close on file pointer. We call soclose() to close the socket * (including initiating closing protocols). soclose() will sorele() the * file reference but the actual socket will not go away until the socket's * ref count hits 0. */ static int soo_close(struct file *fp, struct thread *td) { int error = 0; struct socket *so; so = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; if (so) error = soclose(so); return (error); } static int soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct sockaddr *sa; struct inpcb *inpcb; struct unpcb *unpcb; struct socket *so; int error; kif->kf_type = KF_TYPE_SOCKET; so = fp->f_data; - kif->kf_sock_domain = so->so_proto->pr_domain->dom_family; - kif->kf_sock_type = so->so_type; - kif->kf_sock_protocol = so->so_proto->pr_protocol; + kif->kf_un.kf_sock.kf_sock_domain0 = + so->so_proto->pr_domain->dom_family; + kif->kf_un.kf_sock.kf_sock_type0 = so->so_type; + kif->kf_un.kf_sock.kf_sock_protocol0 = so->so_proto->pr_protocol; kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb; - switch (kif->kf_sock_domain) { + switch (kif->kf_un.kf_sock.kf_sock_domain0) { case AF_INET: case AF_INET6: - if (kif->kf_sock_protocol == IPPROTO_TCP) { + if (kif->kf_un.kf_sock.kf_sock_protocol0 == IPPROTO_TCP) { if (so->so_pcb != NULL) { inpcb = (struct inpcb *)(so->so_pcb); kif->kf_un.kf_sock.kf_sock_inpcb = (uintptr_t)inpcb->inp_ppcb; } } break; case AF_UNIX: if (so->so_pcb != NULL) { unpcb = (struct unpcb *)(so->so_pcb); if (unpcb->unp_conn) { kif->kf_un.kf_sock.kf_sock_unpconn = (uintptr_t)unpcb->unp_conn; kif->kf_un.kf_sock.kf_sock_rcv_sb_state = so->so_rcv.sb_state; kif->kf_un.kf_sock.kf_sock_snd_sb_state = so->so_snd.sb_state; } } break; } error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); - if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { - bcopy(sa, &kif->kf_sa_local, sa->sa_len); + if (error == 0 && + sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_local)) { + bcopy(sa, &kif->kf_un.kf_sock.kf_sa_local, sa->sa_len); free(sa, M_SONAME); } error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa); - if (error == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { - bcopy(sa, &kif->kf_sa_peer, sa->sa_len); + if (error == 0 && + sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_peer)) { + bcopy(sa, &kif->kf_un.kf_sock.kf_sa_peer, sa->sa_len); free(sa, M_SONAME); } strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name, sizeof(kif->kf_path)); return (0); } /* * Use the 'backend3' field in AIO jobs to store the amount of data * completed by the AIO job so far. */ #define aio_done backend3 static STAILQ_HEAD(, task) soaio_jobs; static struct mtx soaio_jobs_lock; static struct task soaio_kproc_task; static int soaio_starting, soaio_idle, soaio_queued; static struct unrhdr *soaio_kproc_unr; static int soaio_max_procs = MAX_AIO_PROCS; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, max_procs, CTLFLAG_RW, &soaio_max_procs, 0, "Maximum number of kernel processes to use for async socket IO"); static int soaio_num_procs; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, num_procs, CTLFLAG_RD, &soaio_num_procs, 0, "Number of active kernel processes for async socket IO"); static int soaio_target_procs = TARGET_AIO_PROCS; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, target_procs, CTLFLAG_RD, &soaio_target_procs, 0, "Preferred number of ready kernel processes for async socket IO"); static int soaio_lifetime; SYSCTL_INT(_kern_ipc_aio, OID_AUTO, lifetime, CTLFLAG_RW, &soaio_lifetime, 0, "Maximum lifetime for idle aiod"); static void soaio_kproc_loop(void *arg) { struct proc *p; struct vmspace *myvm; struct task *task; int error, id, pending; id = (intptr_t)arg; /* * Grab an extra reference on the daemon's vmspace so that it * doesn't get freed by jobs that switch to a different * vmspace. */ p = curproc; myvm = vmspace_acquire_ref(p); mtx_lock(&soaio_jobs_lock); MPASS(soaio_starting > 0); soaio_starting--; for (;;) { while (!STAILQ_EMPTY(&soaio_jobs)) { task = STAILQ_FIRST(&soaio_jobs); STAILQ_REMOVE_HEAD(&soaio_jobs, ta_link); soaio_queued--; pending = task->ta_pending; task->ta_pending = 0; mtx_unlock(&soaio_jobs_lock); task->ta_func(task->ta_context, pending); mtx_lock(&soaio_jobs_lock); } MPASS(soaio_queued == 0); if (p->p_vmspace != myvm) { mtx_unlock(&soaio_jobs_lock); vmspace_switch_aio(myvm); mtx_lock(&soaio_jobs_lock); continue; } soaio_idle++; error = mtx_sleep(&soaio_idle, &soaio_jobs_lock, 0, "-", soaio_lifetime); soaio_idle--; if (error == EWOULDBLOCK && STAILQ_EMPTY(&soaio_jobs) && soaio_num_procs > soaio_target_procs) break; } soaio_num_procs--; mtx_unlock(&soaio_jobs_lock); free_unr(soaio_kproc_unr, id); kproc_exit(0); } static void soaio_kproc_create(void *context, int pending) { struct proc *p; int error, id; mtx_lock(&soaio_jobs_lock); for (;;) { if (soaio_num_procs < soaio_target_procs) { /* Must create */ } else if (soaio_num_procs >= soaio_max_procs) { /* * Hit the limit on kernel processes, don't * create another one. */ break; } else if (soaio_queued <= soaio_idle + soaio_starting) { /* * No more AIO jobs waiting for a process to be * created, so stop. */ break; } soaio_starting++; mtx_unlock(&soaio_jobs_lock); id = alloc_unr(soaio_kproc_unr); error = kproc_create(soaio_kproc_loop, (void *)(intptr_t)id, &p, 0, 0, "soaiod%d", id); if (error != 0) { free_unr(soaio_kproc_unr, id); mtx_lock(&soaio_jobs_lock); soaio_starting--; break; } mtx_lock(&soaio_jobs_lock); soaio_num_procs++; } mtx_unlock(&soaio_jobs_lock); } void soaio_enqueue(struct task *task) { mtx_lock(&soaio_jobs_lock); MPASS(task->ta_pending == 0); task->ta_pending++; STAILQ_INSERT_TAIL(&soaio_jobs, task, ta_link); soaio_queued++; if (soaio_queued <= soaio_idle) wakeup_one(&soaio_idle); else if (soaio_num_procs < soaio_max_procs) taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task); mtx_unlock(&soaio_jobs_lock); } static void soaio_init(void) { soaio_lifetime = AIOD_LIFETIME_DEFAULT; STAILQ_INIT(&soaio_jobs); mtx_init(&soaio_jobs_lock, "soaio jobs", NULL, MTX_DEF); soaio_kproc_unr = new_unrhdr(1, INT_MAX, NULL); TASK_INIT(&soaio_kproc_task, 0, soaio_kproc_create, NULL); if (soaio_target_procs > 0) taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task); } SYSINIT(soaio, SI_SUB_VFS, SI_ORDER_ANY, soaio_init, NULL); static __inline int soaio_ready(struct socket *so, struct sockbuf *sb) { return (sb == &so->so_rcv ? soreadable(so) : sowriteable(so)); } static void soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job) { struct ucred *td_savedcred; struct thread *td; struct file *fp; struct uio uio; struct iovec iov; size_t cnt, done; long ru_before; int error, flags; SOCKBUF_UNLOCK(sb); aio_switch_vmspace(job); td = curthread; fp = job->fd_file; retry: td_savedcred = td->td_ucred; td->td_ucred = job->cred; done = job->aio_done; cnt = job->uaiocb.aio_nbytes - done; iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done); iov.iov_len = cnt; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = cnt; uio.uio_segflg = UIO_USERSPACE; uio.uio_td = td; flags = MSG_NBIO; /* * For resource usage accounting, only count a completed request * as a single message to avoid counting multiple calls to * sosend/soreceive on a blocking socket. */ if (sb == &so->so_rcv) { uio.uio_rw = UIO_READ; ru_before = td->td_ru.ru_msgrcv; #ifdef MAC error = mac_socket_check_receive(fp->f_cred, so); if (error == 0) #endif error = soreceive(so, NULL, &uio, NULL, NULL, &flags); if (td->td_ru.ru_msgrcv != ru_before) job->msgrcv = 1; } else { if (!TAILQ_EMPTY(&sb->sb_aiojobq)) flags |= MSG_MORETOCOME; uio.uio_rw = UIO_WRITE; ru_before = td->td_ru.ru_msgsnd; #ifdef MAC error = mac_socket_check_send(fp->f_cred, so); if (error == 0) #endif error = sosend(so, NULL, &uio, NULL, NULL, flags, td); if (td->td_ru.ru_msgsnd != ru_before) job->msgsnd = 1; if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } } done += cnt - uio.uio_resid; job->aio_done = done; td->td_ucred = td_savedcred; if (error == EWOULDBLOCK) { /* * The request was either partially completed or not * completed at all due to racing with a read() or * write() on the socket. If the socket is * non-blocking, return with any partial completion. * If the socket is blocking or if no progress has * been made, requeue this request at the head of the * queue to try again when the socket is ready. */ MPASS(done != job->uaiocb.aio_nbytes); SOCKBUF_LOCK(sb); if (done == 0 || !(so->so_state & SS_NBIO)) { empty_results++; if (soaio_ready(so, sb)) { empty_retries++; SOCKBUF_UNLOCK(sb); goto retry; } if (!aio_set_cancel_function(job, soo_aio_cancel)) { SOCKBUF_UNLOCK(sb); if (done != 0) aio_complete(job, done, 0); else aio_cancel(job); SOCKBUF_LOCK(sb); } else { TAILQ_INSERT_HEAD(&sb->sb_aiojobq, job, list); } return; } SOCKBUF_UNLOCK(sb); } if (done != 0 && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error) aio_complete(job, -1, error); else aio_complete(job, done, 0); SOCKBUF_LOCK(sb); } static void soaio_process_sb(struct socket *so, struct sockbuf *sb) { struct kaiocb *job; SOCKBUF_LOCK(sb); while (!TAILQ_EMPTY(&sb->sb_aiojobq) && soaio_ready(so, sb)) { job = TAILQ_FIRST(&sb->sb_aiojobq); TAILQ_REMOVE(&sb->sb_aiojobq, job, list); if (!aio_clear_cancel_function(job)) continue; soaio_process_job(so, sb, job); } /* * If there are still pending requests, the socket must not be * ready so set SB_AIO to request a wakeup when the socket * becomes ready. */ if (!TAILQ_EMPTY(&sb->sb_aiojobq)) sb->sb_flags |= SB_AIO; sb->sb_flags &= ~SB_AIO_RUNNING; SOCKBUF_UNLOCK(sb); ACCEPT_LOCK(); SOCK_LOCK(so); sorele(so); } void soaio_rcv(void *context, int pending) { struct socket *so; so = context; soaio_process_sb(so, &so->so_rcv); } void soaio_snd(void *context, int pending) { struct socket *so; so = context; soaio_process_sb(so, &so->so_snd); } void sowakeup_aio(struct socket *so, struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags &= ~SB_AIO; if (sb->sb_flags & SB_AIO_RUNNING) return; sb->sb_flags |= SB_AIO_RUNNING; if (sb == &so->so_snd) SOCK_LOCK(so); soref(so); if (sb == &so->so_snd) SOCK_UNLOCK(so); soaio_enqueue(&sb->sb_aiotask); } static void soo_aio_cancel(struct kaiocb *job) { struct socket *so; struct sockbuf *sb; long done; int opcode; so = job->fd_file->f_data; opcode = job->uaiocb.aio_lio_opcode; if (opcode == LIO_READ) sb = &so->so_rcv; else { MPASS(opcode == LIO_WRITE); sb = &so->so_snd; } SOCKBUF_LOCK(sb); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&sb->sb_aiojobq, job, list); if (TAILQ_EMPTY(&sb->sb_aiojobq)) sb->sb_flags &= ~SB_AIO; SOCKBUF_UNLOCK(sb); done = job->aio_done; if (done != 0) aio_complete(job, done, 0); else aio_cancel(job); } static int soo_aio_queue(struct file *fp, struct kaiocb *job) { struct socket *so; struct sockbuf *sb; int error; so = fp->f_data; error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job); if (error == 0) return (0); switch (job->uaiocb.aio_lio_opcode) { case LIO_READ: sb = &so->so_rcv; break; case LIO_WRITE: sb = &so->so_snd; break; default: return (EINVAL); } SOCKBUF_LOCK(sb); if (!aio_set_cancel_function(job, soo_aio_cancel)) panic("new job was cancelled"); TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list); if (!(sb->sb_flags & SB_AIO_RUNNING)) { if (soaio_ready(so, sb)) sowakeup_aio(so, sb); else sb->sb_flags |= SB_AIO; } SOCKBUF_UNLOCK(sb); return (0); } Index: head/sys/kern/syscalls.master =================================================================== --- head/sys/kern/syscalls.master (revision 318735) +++ head/sys/kern/syscalls.master (revision 318736) @@ -1,1002 +1,1022 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, -; COMPAT7, NODEF, NOARGS, NOPROTO, NOSTD +; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name psuedo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat) +; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; NOTSTATIC syscall is loadable ; ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL STD { int nosys(void); } syscall nosys_args int 1 AUE_EXIT STD { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK STD { int fork(void); } 3 AUE_READ STD { ssize_t read(int fd, void *buf, \ size_t nbyte); } 4 AUE_WRITE STD { ssize_t write(int fd, const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC STD { int open(char *path, int flags, int mode); } ; XXX should be { int open(const char *path, int flags, ...); } ; but we're not ready for `const' or varargs. ; XXX man page says `mode_t mode'. 6 AUE_CLOSE STD { int close(int fd); } 7 AUE_WAIT4 STD { int wait4(int pid, int *status, \ int options, struct rusage *rusage); } 8 AUE_CREAT COMPAT { int creat(char *path, int mode); } 9 AUE_LINK STD { int link(char *path, char *link); } 10 AUE_UNLINK STD { int unlink(char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR STD { int chdir(char *path); } 13 AUE_FCHDIR STD { int fchdir(int fd); } -14 AUE_MKNOD STD { int mknod(char *path, int mode, int dev); } +14 AUE_MKNOD COMPAT11 { int mknod(char *path, int mode, int dev); } 15 AUE_CHMOD STD { int chmod(char *path, int mode); } 16 AUE_CHOWN STD { int chown(char *path, int uid, int gid); } 17 AUE_NULL STD { int obreak(char *nsize); } break \ obreak_args int 18 AUE_GETFSSTAT COMPAT4 { int getfsstat(struct ostatfs *buf, \ long bufsize, int mode); } 19 AUE_LSEEK COMPAT { long lseek(int fd, long offset, \ int whence); } 20 AUE_GETPID STD { pid_t getpid(void); } 21 AUE_MOUNT STD { int mount(char *type, char *path, \ int flags, caddr_t data); } ; XXX `path' should have type `const char *' but we're not ready for that. 22 AUE_UMOUNT STD { int unmount(char *path, int flags); } 23 AUE_SETUID STD { int setuid(uid_t uid); } 24 AUE_GETUID STD { uid_t getuid(void); } 25 AUE_GETEUID STD { uid_t geteuid(void); } 26 AUE_PTRACE STD { int ptrace(int req, pid_t pid, \ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int recvmsg(int s, struct msghdr *msg, \ int flags); } 28 AUE_SENDMSG STD { int sendmsg(int s, struct msghdr *msg, \ int flags); } 29 AUE_RECVFROM STD { int recvfrom(int s, caddr_t buf, \ size_t len, int flags, \ struct sockaddr * __restrict from, \ __socklen_t * __restrict fromlenaddr); } 30 AUE_ACCEPT STD { int accept(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen); } 31 AUE_GETPEERNAME STD { int getpeername(int fdes, \ struct sockaddr * __restrict asa, \ __socklen_t * __restrict alen); } 32 AUE_GETSOCKNAME STD { int getsockname(int fdes, \ struct sockaddr * __restrict asa, \ __socklen_t * __restrict alen); } 33 AUE_ACCESS STD { int access(char *path, int amode); } 34 AUE_CHFLAGS STD { int chflags(const char *path, u_long flags); } 35 AUE_FCHFLAGS STD { int fchflags(int fd, u_long flags); } 36 AUE_SYNC STD { int sync(void); } 37 AUE_KILL STD { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int stat(char *path, struct ostat *ub); } 39 AUE_GETPPID STD { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int lstat(char *path, struct ostat *ub); } 41 AUE_DUP STD { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int pipe(void); } 43 AUE_GETEGID STD { gid_t getegid(void); } 44 AUE_PROFILE STD { int profil(caddr_t samples, size_t size, \ size_t offset, u_int scale); } 45 AUE_KTRACE STD { int ktrace(const char *fname, int ops, \ int facs, int pid); } 46 AUE_SIGACTION COMPAT { int sigaction(int signum, \ struct osigaction *nsa, \ struct osigaction *osa); } 47 AUE_GETGID STD { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int sigprocmask(int how, osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it, and we return the old mask as the ; (int) return value. 49 AUE_GETLOGIN STD { int getlogin(char *namebuf, u_int \ namelen); } 50 AUE_SETLOGIN STD { int setlogin(char *namebuf); } 51 AUE_ACCT STD { int acct(char *path); } 52 AUE_SIGPENDING COMPAT { int sigpending(void); } 53 AUE_SIGALTSTACK STD { int sigaltstack(stack_t *ss, \ stack_t *oss); } 54 AUE_IOCTL STD { int ioctl(int fd, u_long com, \ caddr_t data); } 55 AUE_REBOOT STD { int reboot(int opt); } 56 AUE_REVOKE STD { int revoke(char *path); } 57 AUE_SYMLINK STD { int symlink(char *path, char *link); } 58 AUE_READLINK STD { ssize_t readlink(char *path, char *buf, \ size_t count); } 59 AUE_EXECVE STD { int execve(char *fname, char **argv, \ char **envv); } 60 AUE_UMASK STD { int umask(int newmask); } umask umask_args \ int 61 AUE_CHROOT STD { int chroot(char *path); } 62 AUE_FSTAT COMPAT { int fstat(int fd, struct ostat *sb); } 63 AUE_NULL COMPAT { int getkerninfo(int op, char *where, \ size_t *size, int arg); } getkerninfo \ getkerninfo_args int 64 AUE_NULL COMPAT { int getpagesize(void); } getpagesize \ getpagesize_args int 65 AUE_MSYNC STD { int msync(void *addr, size_t len, \ int flags); } 66 AUE_VFORK STD { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK STD { int sbrk(int incr); } 70 AUE_SSTK STD { int sstk(int incr); } 71 AUE_MMAP COMPAT { int mmap(void *addr, int len, int prot, \ int flags, int fd, long pos); } 72 AUE_O_VADVISE STD { int ovadvise(int anom); } vadvise \ ovadvise_args int 73 AUE_MUNMAP STD { int munmap(void *addr, size_t len); } 74 AUE_MPROTECT STD { int mprotect(void *addr, size_t len, \ int prot); } 75 AUE_MADVISE STD { int madvise(void *addr, size_t len, \ int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE STD { int mincore(const void *addr, size_t len, \ char *vec); } 79 AUE_GETGROUPS STD { int getgroups(u_int gidsetsize, \ gid_t *gidset); } 80 AUE_SETGROUPS STD { int setgroups(u_int gidsetsize, \ gid_t *gidset); } 81 AUE_GETPGRP STD { int getpgrp(void); } 82 AUE_SETPGRP STD { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int setitimer(u_int which, struct \ itimerval *itv, struct itimerval *oitv); } 84 AUE_WAIT4 COMPAT { int wait(void); } 85 AUE_SWAPON STD { int swapon(char *name); } 86 AUE_GETITIMER STD { int getitimer(u_int which, \ struct itimerval *itv); } 87 AUE_SYSCTL COMPAT { int gethostname(char *hostname, \ u_int len); } gethostname \ gethostname_args int 88 AUE_SYSCTL COMPAT { int sethostname(char *hostname, \ u_int len); } sethostname \ sethostname_args int 89 AUE_GETDTABLESIZE STD { int getdtablesize(void); } 90 AUE_DUP2 STD { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int fcntl(int fd, int cmd, long arg); } ; XXX should be { int fcntl(int fd, int cmd, ...); } ; but we're not ready for varargs. 93 AUE_SELECT STD { int select(int nd, fd_set *in, fd_set *ou, \ fd_set *ex, struct timeval *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC STD { int fsync(int fd); } 96 AUE_SETPRIORITY STD { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET STD { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT STD { int connect(int s, caddr_t name, \ int namelen); } 99 AUE_ACCEPT COMPAT|NOARGS { int accept(int s, caddr_t name, \ int *anamelen); } accept accept_args int 100 AUE_GETPRIORITY STD { int getpriority(int which, int who); } 101 AUE_SEND COMPAT { int send(int s, caddr_t buf, int len, \ int flags); } 102 AUE_RECV COMPAT { int recv(int s, caddr_t buf, int len, \ int flags); } 103 AUE_SIGRETURN COMPAT { int sigreturn( \ struct osigcontext *sigcntxp); } 104 AUE_BIND STD { int bind(int s, caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT STD { int setsockopt(int s, int level, int name, \ caddr_t val, int valsize); } 106 AUE_LISTEN STD { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_NULL COMPAT { int sigvec(int signum, struct sigvec *nsv, \ struct sigvec *osv); } 109 AUE_NULL COMPAT { int sigblock(int mask); } 110 AUE_NULL COMPAT { int sigsetmask(int mask); } 111 AUE_NULL COMPAT { int sigsuspend(osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it. 112 AUE_NULL COMPAT { int sigstack(struct sigstack *nss, \ struct sigstack *oss); } 113 AUE_RECVMSG COMPAT { int recvmsg(int s, struct omsghdr *msg, \ int flags); } 114 AUE_SENDMSG COMPAT { int sendmsg(int s, caddr_t msg, \ int flags); } 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int gettimeofday(struct timeval *tp, \ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int getrusage(int who, \ struct rusage *rusage); } 118 AUE_GETSOCKOPT STD { int getsockopt(int s, int level, int name, \ caddr_t val, int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int readv(int fd, struct iovec *iovp, \ u_int iovcnt); } 121 AUE_WRITEV STD { int writev(int fd, struct iovec *iovp, \ u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int settimeofday(struct timeval *tv, \ struct timezone *tzp); } 123 AUE_FCHOWN STD { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD STD { int fchmod(int fd, int mode); } 125 AUE_RECVFROM COMPAT|NOARGS { int recvfrom(int s, caddr_t buf, \ size_t len, int flags, caddr_t from, int \ *fromlenaddr); } recvfrom recvfrom_args \ int 126 AUE_SETREUID STD { int setreuid(int ruid, int euid); } 127 AUE_SETREGID STD { int setregid(int rgid, int egid); } 128 AUE_RENAME STD { int rename(char *from, char *to); } 129 AUE_TRUNCATE COMPAT { int truncate(char *path, long length); } 130 AUE_FTRUNCATE COMPAT { int ftruncate(int fd, long length); } 131 AUE_FLOCK STD { int flock(int fd, int how); } 132 AUE_MKFIFO STD { int mkfifo(char *path, int mode); } 133 AUE_SENDTO STD { int sendto(int s, caddr_t buf, size_t len, \ int flags, caddr_t to, int tolen); } 134 AUE_SHUTDOWN STD { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR STD { int socketpair(int domain, int type, \ int protocol, int *rsv); } 136 AUE_MKDIR STD { int mkdir(char *path, int mode); } 137 AUE_RMDIR STD { int rmdir(char *path); } 138 AUE_UTIMES STD { int utimes(char *path, \ struct timeval *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int adjtime(struct timeval *delta, \ struct timeval *olddelta); } 141 AUE_GETPEERNAME COMPAT { int getpeername(int fdes, caddr_t asa, \ int *alen); } 142 AUE_SYSCTL COMPAT { long gethostid(void); } 143 AUE_SYSCTL COMPAT { int sethostid(long hostid); } 144 AUE_GETRLIMIT COMPAT { int getrlimit(u_int which, struct \ orlimit *rlp); } 145 AUE_SETRLIMIT COMPAT { int setrlimit(u_int which, \ struct orlimit *rlp); } 146 AUE_KILLPG COMPAT { int killpg(int pgid, int signum); } 147 AUE_SETSID STD { int setsid(void); } 148 AUE_QUOTACTL STD { int quotactl(char *path, int cmd, int uid, \ caddr_t arg); } 149 AUE_O_QUOTA COMPAT { int quota(void); } 150 AUE_GETSOCKNAME COMPAT|NOARGS { int getsockname(int fdec, \ caddr_t asa, int *alen); } getsockname \ getsockname_args int ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL NOSTD { int nlm_syscall(int debug_level, int grace_period, int addr_count, char **addrs); } ; 155 is initialized by the NFS code, if present. 155 AUE_NFS_SVC NOSTD { int nfssvc(int flag, caddr_t argp); } 156 AUE_GETDIRENTRIES COMPAT { int getdirentries(int fd, char *buf, \ u_int count, long *basep); } 157 AUE_STATFS COMPAT4 { int statfs(char *path, \ struct ostatfs *buf); } 158 AUE_FSTATFS COMPAT4 { int fstatfs(int fd, \ struct ostatfs *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH STD { int lgetfh(char *fname, \ struct fhandle *fhp); } 161 AUE_NFS_GETFH STD { int getfh(char *fname, \ struct fhandle *fhp); } 162 AUE_SYSCTL COMPAT4 { int getdomainname(char *domainname, \ int len); } 163 AUE_SYSCTL COMPAT4 { int setdomainname(char *domainname, \ int len); } 164 AUE_NULL COMPAT4 { int uname(struct utsname *name); } 165 AUE_SYSARCH STD { int sysarch(int op, char *parms); } 166 AUE_RTPRIO STD { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int semsys(int which, int a2, int a3, \ int a4, int a5); } ; XXX should be { int semsys(int which, ...); } 170 AUE_MSGSYS NOSTD { int msgsys(int which, int a2, int a3, \ int a4, int a5, int a6); } ; XXX should be { int msgsys(int which, ...); } 171 AUE_SHMSYS NOSTD { int shmsys(int which, int a2, int a3, \ int a4); } ; XXX should be { int shmsys(int which, ...); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t pread(int fd, void *buf, \ size_t nbyte, int pad, off_t offset); } 174 AUE_PWRITE COMPAT6 { ssize_t pwrite(int fd, \ const void *buf, \ size_t nbyte, int pad, off_t offset); } 175 AUE_SETFIB STD { int setfib(int fibnum); } 176 AUE_NTP_ADJTIME STD { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID STD { int setgid(gid_t gid); } 182 AUE_SETEGID STD { int setegid(gid_t egid); } 183 AUE_SETEUID STD { int seteuid(uid_t euid); } 184 AUE_NULL UNIMPL lfs_bmapv 185 AUE_NULL UNIMPL lfs_markv 186 AUE_NULL UNIMPL lfs_segclean 187 AUE_NULL UNIMPL lfs_segwait -188 AUE_STAT STD { int stat(char *path, struct stat *ub); } -189 AUE_FSTAT STD { int fstat(int fd, struct stat *sb); } -190 AUE_LSTAT STD { int lstat(char *path, struct stat *ub); } +188 AUE_STAT COMPAT11 { int stat(char *path, \ + struct freebsd11_stat *ub); } +189 AUE_FSTAT COMPAT11 { int fstat(int fd, \ + struct freebsd11_stat *sb); } +190 AUE_LSTAT COMPAT11 { int lstat(char *path, \ + struct freebsd11_stat *ub); } 191 AUE_PATHCONF STD { int pathconf(char *path, int name); } 192 AUE_FPATHCONF STD { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT STD { int getrlimit(u_int which, \ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT STD { int setrlimit(u_int which, \ struct rlimit *rlp); } setrlimit \ __setrlimit_args int -196 AUE_GETDIRENTRIES STD { int getdirentries(int fd, char *buf, \ +196 AUE_GETDIRENTRIES COMPAT11 { int getdirentries(int fd, char *buf, \ u_int count, long *basep); } 197 AUE_MMAP COMPAT6 { caddr_t mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, off_t pos); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t lseek(int fd, int pad, \ off_t offset, int whence); } 200 AUE_TRUNCATE COMPAT6 { int truncate(char *path, int pad, \ off_t length); } 201 AUE_FTRUNCATE COMPAT6 { int ftruncate(int fd, int pad, \ off_t length); } 202 AUE_SYSCTL STD { int __sysctl(int *name, u_int namelen, \ void *old, size_t *oldlenp, void *new, \ size_t newlen); } __sysctl sysctl_args int 203 AUE_MLOCK STD { int mlock(const void *addr, size_t len); } 204 AUE_MUNLOCK STD { int munlock(const void *addr, size_t len); } 205 AUE_UNDELETE STD { int undelete(char *path); } 206 AUE_FUTIMES STD { int futimes(int fd, struct timeval *tptr); } 207 AUE_GETPGID STD { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL newreboot (NetBSD) 209 AUE_POLL STD { int poll(struct pollfd *fds, u_int nfds, \ int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int ; ; The following were introduced with NetBSD/4.4Lite-2 220 AUE_SEMCTL COMPAT7|NOSTD { int __semctl(int semid, int semnum, \ int cmd, union semun_old *arg); } 221 AUE_SEMGET NOSTD { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD { int semop(int semid, struct sembuf *sops, \ size_t nsops); } 223 AUE_NULL UNIMPL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \ struct msqid_ds_old *buf); } 225 AUE_MSGGET NOSTD { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int msgsnd(int msqid, const void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { ssize_t msgrcv(int msqid, void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD { int shmat(int shmid, const void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int shmctl(int shmid, int cmd, \ struct shmid_ds_old *buf); } 230 AUE_SHMDT NOSTD { int shmdt(const void *shmaddr); } 231 AUE_SHMGET NOSTD { int shmget(key_t key, size_t size, \ int shmflg); } ; 232 AUE_NULL STD { int clock_gettime(clockid_t clock_id, \ struct timespec *tp); } 233 AUE_CLOCK_SETTIME STD { int clock_settime( \ clockid_t clock_id, \ const struct timespec *tp); } 234 AUE_NULL STD { int clock_getres(clockid_t clock_id, \ struct timespec *tp); } 235 AUE_NULL STD { int ktimer_create(clockid_t clock_id, \ struct sigevent *evp, int *timerid); } 236 AUE_NULL STD { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int ktimer_settime(int timerid, int flags, \ const struct itimerspec *value, \ struct itimerspec *ovalue); } 238 AUE_NULL STD { int ktimer_gettime(int timerid, struct \ itimerspec *value); } 239 AUE_NULL STD { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int nanosleep(const struct timespec *rqtp, \ struct timespec *rmtp); } 241 AUE_NULL STD { int ffclock_getcounter(ffcounter *ffcount); } 242 AUE_NULL STD { int ffclock_setestimate( \ struct ffclock_estimate *cest); } 243 AUE_NULL STD { int ffclock_getestimate( \ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int clock_nanosleep(clockid_t clock_id, \ int flags, const struct timespec *rqtp, \ struct timespec *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int clock_getcpuclockid2(id_t id,\ int which, clockid_t *clock_id); } 248 AUE_NULL STD { int ntp_gettime(struct ntptimeval *ntvp); } 249 AUE_NULL UNIMPL nosys ; syscall numbers initially used in OpenBSD 250 AUE_MINHERIT STD { int minherit(void *addr, size_t len, \ int inherit); } 251 AUE_RFORK STD { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID STD { int issetugid(void); } 254 AUE_LCHOWN STD { int lchown(char *path, int uid, int gid); } 255 AUE_AIO_READ STD { int aio_read(struct aiocb *aiocbp); } 256 AUE_AIO_WRITE STD { int aio_write(struct aiocb *aiocbp); } 257 AUE_LIO_LISTIO STD { int lio_listio(int mode, \ struct aiocb * const *acb_list, \ int nent, struct sigevent *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys -272 AUE_O_GETDENTS STD { int getdents(int fd, char *buf, \ +272 AUE_O_GETDENTS COMPAT11 { int getdents(int fd, char *buf, \ size_t count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD STD { int lchmod(char *path, mode_t mode); } 275 AUE_LCHOWN NOPROTO { int lchown(char *path, uid_t uid, \ gid_t gid); } netbsd_lchown lchown_args \ int 276 AUE_LUTIMES STD { int lutimes(char *path, \ struct timeval *tptr); } 277 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } netbsd_msync msync_args int -278 AUE_STAT STD { int nstat(char *path, struct nstat *ub); } -279 AUE_FSTAT STD { int nfstat(int fd, struct nstat *sb); } -280 AUE_LSTAT STD { int nlstat(char *path, struct nstat *ub); } +278 AUE_STAT COMPAT11 { int nstat(char *path, struct nstat *ub); } +279 AUE_FSTAT COMPAT11 { int nfstat(int fd, struct nstat *sb); } +280 AUE_LSTAT COMPAT11 { int nlstat(char *path, struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys ; 289 and 290 from NetBSD (OpenBSD: 267 and 268) 289 AUE_PREADV STD { ssize_t preadv(int fd, struct iovec *iovp, \ u_int iovcnt, off_t offset); } 290 AUE_PWRITEV STD { ssize_t pwritev(int fd, struct iovec *iovp, \ u_int iovcnt, off_t offset); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys ; XXX 297 is 300 in NetBSD 297 AUE_FHSTATFS COMPAT4 { int fhstatfs( \ const struct fhandle *u_fhp, \ struct ostatfs *buf); } 298 AUE_FHOPEN STD { int fhopen(const struct fhandle *u_fhp, \ int flags); } -299 AUE_FHSTAT STD { int fhstat(const struct fhandle *u_fhp, \ - struct stat *sb); } +299 AUE_FHSTAT COMPAT11 { int fhstat(const struct fhandle *u_fhp, \ + struct freebsd11_stat *sb); } ; syscall numbers for FreeBSD 300 AUE_NULL STD { int modnext(int modid); } 301 AUE_NULL STD { int modstat(int modid, \ struct module_stat *stat); } 302 AUE_NULL STD { int modfnext(int modid); } 303 AUE_NULL STD { int modfind(const char *name); } 304 AUE_MODLOAD STD { int kldload(const char *file); } 305 AUE_MODUNLOAD STD { int kldunload(int fileid); } 306 AUE_NULL STD { int kldfind(const char *file); } 307 AUE_NULL STD { int kldnext(int fileid); } 308 AUE_NULL STD { int kldstat(int fileid, struct \ kld_file_stat* stat); } 309 AUE_NULL STD { int kldfirstmod(int fileid); } 310 AUE_GETSID STD { int getsid(pid_t pid); } 311 AUE_SETRESUID STD { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID STD { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { ssize_t aio_return(struct aiocb *aiocbp); } 315 AUE_AIO_SUSPEND STD { int aio_suspend( \ struct aiocb * const * aiocbp, int nent, \ const struct timespec *timeout); } 316 AUE_AIO_CANCEL STD { int aio_cancel(int fd, \ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int aio_error(struct aiocb *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int aio_read(struct oaiocb *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int aio_write(struct oaiocb *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int lio_listio(int mode, \ struct oaiocb * const *acb_list, \ int nent, struct osigevent *sig); } 321 AUE_NULL STD { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL STD { int mlockall(int how); } 325 AUE_MUNLOCKALL STD { int munlockall(void); } 326 AUE_GETCWD STD { int __getcwd(char *buf, size_t buflen); } 327 AUE_NULL STD { int sched_setparam (pid_t pid, \ const struct sched_param *param); } 328 AUE_NULL STD { int sched_getparam (pid_t pid, struct \ sched_param *param); } 329 AUE_NULL STD { int sched_setscheduler (pid_t pid, int \ policy, const struct sched_param \ *param); } 330 AUE_NULL STD { int sched_getscheduler (pid_t pid); } 331 AUE_NULL STD { int sched_yield (void); } 332 AUE_NULL STD { int sched_get_priority_max (int policy); } 333 AUE_NULL STD { int sched_get_priority_min (int policy); } 334 AUE_NULL STD { int sched_rr_get_interval (pid_t pid, \ struct timespec *interval); } 335 AUE_NULL STD { int utrace(const void *addr, size_t len); } 336 AUE_SENDFILE COMPAT4 { int sendfile(int fd, int s, \ off_t offset, size_t nbytes, \ struct sf_hdtr *hdtr, off_t *sbytes, \ int flags); } 337 AUE_NULL STD { int kldsym(int fileid, int cmd, \ void *data); } 338 AUE_JAIL STD { int jail(struct jail *jail); } 339 AUE_NULL NOSTD|NOTSTATIC { int nnpfs_syscall(int operation, \ char *a_pathP, int a_opcode, \ void *a_paramsP, int a_followSymlinks); } 340 AUE_SIGPROCMASK STD { int sigprocmask(int how, \ const sigset_t *set, sigset_t *oset); } 341 AUE_SIGSUSPEND STD { int sigsuspend(const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int sigaction(int sig, const \ struct sigaction *act, \ struct sigaction *oact); } 343 AUE_SIGPENDING STD { int sigpending(sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int sigreturn( \ const struct ucontext4 *sigcntxp); } 345 AUE_SIGWAIT STD { int sigtimedwait(const sigset_t *set, \ siginfo_t *info, \ const struct timespec *timeout); } 346 AUE_NULL STD { int sigwaitinfo(const sigset_t *set, \ siginfo_t *info); } 347 AUE_ACL_GET_FILE STD { int __acl_get_file(const char *path, \ acl_type_t type, struct acl *aclp); } 348 AUE_ACL_SET_FILE STD { int __acl_set_file(const char *path, \ acl_type_t type, struct acl *aclp); } 349 AUE_ACL_GET_FD STD { int __acl_get_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 350 AUE_ACL_SET_FD STD { int __acl_set_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 351 AUE_ACL_DELETE_FILE STD { int __acl_delete_file(const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD STD { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE STD { int __acl_aclcheck_file(const char *path, \ acl_type_t type, struct acl *aclp); } 354 AUE_ACL_CHECK_FD STD { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 355 AUE_EXTATTRCTL STD { int extattrctl(const char *path, int cmd, \ const char *filename, int attrnamespace, \ const char *attrname); } 356 AUE_EXTATTR_SET_FILE STD { ssize_t extattr_set_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE STD { ssize_t extattr_get_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE STD { int extattr_delete_file(const char *path, \ int attrnamespace, \ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { ssize_t aio_waitcomplete( \ struct aiocb **aiocbp, \ struct timespec *timeout); } 360 AUE_GETRESUID STD { int getresuid(uid_t *ruid, uid_t *euid, \ uid_t *suid); } 361 AUE_GETRESGID STD { int getresgid(gid_t *rgid, gid_t *egid, \ gid_t *sgid); } 362 AUE_KQUEUE STD { int kqueue(void); } 363 AUE_KEVENT STD { int kevent(int fd, \ struct kevent *changelist, int nchanges, \ struct kevent *eventlist, int nevents, \ const struct timespec *timeout); } 364 AUE_NULL UNIMPL __cap_get_proc 365 AUE_NULL UNIMPL __cap_set_proc 366 AUE_NULL UNIMPL __cap_get_fd 367 AUE_NULL UNIMPL __cap_get_file 368 AUE_NULL UNIMPL __cap_set_fd 369 AUE_NULL UNIMPL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD STD { ssize_t extattr_set_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 372 AUE_EXTATTR_GET_FD STD { ssize_t extattr_get_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD STD { int extattr_delete_fd(int fd, \ int attrnamespace, \ const char *attrname); } 374 AUE_SETUGID STD { int __setugid(int flag); } 375 AUE_NULL UNIMPL nfsclnt 376 AUE_EACCESS STD { int eaccess(char *path, int amode); } 377 AUE_NULL NOSTD|NOTSTATIC { int afs3_syscall(long syscall, \ long parm1, long parm2, long parm3, \ long parm4, long parm5, long parm6); } 378 AUE_NMOUNT STD { int nmount(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 379 AUE_NULL UNIMPL kse_exit 380 AUE_NULL UNIMPL kse_wakeup 381 AUE_NULL UNIMPL kse_create 382 AUE_NULL UNIMPL kse_thr_interrupt 383 AUE_NULL UNIMPL kse_release 384 AUE_NULL STD { int __mac_get_proc(struct mac *mac_p); } 385 AUE_NULL STD { int __mac_set_proc(struct mac *mac_p); } 386 AUE_NULL STD { int __mac_get_fd(int fd, \ struct mac *mac_p); } 387 AUE_NULL STD { int __mac_get_file(const char *path_p, \ struct mac *mac_p); } 388 AUE_NULL STD { int __mac_set_fd(int fd, \ struct mac *mac_p); } 389 AUE_NULL STD { int __mac_set_file(const char *path_p, \ struct mac *mac_p); } 390 AUE_NULL STD { int kenv(int what, const char *name, \ char *value, int len); } 391 AUE_LCHFLAGS STD { int lchflags(const char *path, \ u_long flags); } 392 AUE_NULL STD { int uuidgen(struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int sendfile(int fd, int s, off_t offset, \ size_t nbytes, struct sf_hdtr *hdtr, \ off_t *sbytes, int flags); } 394 AUE_NULL STD { int mac_syscall(const char *policy, \ int call, void *arg); } -395 AUE_GETFSSTAT STD { int getfsstat(struct statfs *buf, \ +395 AUE_GETFSSTAT COMPAT11 { int getfsstat(struct freebsd11_statfs *buf, \ long bufsize, int mode); } -396 AUE_STATFS STD { int statfs(char *path, \ - struct statfs *buf); } -397 AUE_FSTATFS STD { int fstatfs(int fd, struct statfs *buf); } -398 AUE_FHSTATFS STD { int fhstatfs(const struct fhandle *u_fhp, \ - struct statfs *buf); } +396 AUE_STATFS COMPAT11 { int statfs(char *path, \ + struct freebsd11_statfs *buf); } +397 AUE_FSTATFS COMPAT11 { int fstatfs(int fd, \ + struct freebsd11_statfs *buf); } +398 AUE_FHSTATFS COMPAT11 { int fhstatfs(const struct fhandle *u_fhp, \ + struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int ksem_init(semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int ksem_open(semid_t *idp, \ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD { int ksem_unlink(const char *name); } 407 AUE_SEMGETVALUE NOSTD { int ksem_getvalue(semid_t id, int *val); } 408 AUE_SEMDESTROY NOSTD { int ksem_destroy(semid_t id); } 409 AUE_NULL STD { int __mac_get_pid(pid_t pid, \ struct mac *mac_p); } 410 AUE_NULL STD { int __mac_get_link(const char *path_p, \ struct mac *mac_p); } 411 AUE_NULL STD { int __mac_set_link(const char *path_p, \ struct mac *mac_p); } 412 AUE_EXTATTR_SET_LINK STD { ssize_t extattr_set_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK STD { ssize_t extattr_get_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK STD { int extattr_delete_link( \ const char *path, int attrnamespace, \ const char *attrname); } 415 AUE_NULL STD { int __mac_execve(char *fname, char **argv, \ char **envv, struct mac *mac_p); } 416 AUE_SIGACTION STD { int sigaction(int sig, \ const struct sigaction *act, \ struct sigaction *oact); } 417 AUE_SIGRETURN STD { int sigreturn( \ const struct __ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int getcontext(struct __ucontext *ucp); } 422 AUE_NULL STD { int setcontext( \ const struct __ucontext *ucp); } 423 AUE_NULL STD { int swapcontext(struct __ucontext *oucp, \ const struct __ucontext *ucp); } 424 AUE_SWAPOFF STD { int swapoff(const char *name); } 425 AUE_ACL_GET_LINK STD { int __acl_get_link(const char *path, \ acl_type_t type, struct acl *aclp); } 426 AUE_ACL_SET_LINK STD { int __acl_set_link(const char *path, \ acl_type_t type, struct acl *aclp); } 427 AUE_ACL_DELETE_LINK STD { int __acl_delete_link(const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK STD { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } 429 AUE_SIGWAIT STD { int sigwait(const sigset_t *set, \ int *sig); } 430 AUE_THR_CREATE STD { int thr_create(ucontext_t *ctx, long *id, \ int flags); } 431 AUE_THR_EXIT STD { void thr_exit(long *state); } 432 AUE_NULL STD { int thr_self(long *id); } 433 AUE_THR_KILL STD { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH STD { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD STD { ssize_t extattr_list_fd(int fd, \ int attrnamespace, void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE STD { ssize_t extattr_list_file( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK STD { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 440 AUE_NULL UNIMPL kse_switchin 441 AUE_SEMWAIT NOSTD { int ksem_timedwait(semid_t id, \ const struct timespec *abstime); } 442 AUE_NULL STD { int thr_suspend( \ const struct timespec *timeout); } 443 AUE_NULL STD { int thr_wake(long id); } 444 AUE_MODUNLOAD STD { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT STD { int audit(const void *record, \ u_int length); } 446 AUE_AUDITON STD { int auditon(int cmd, void *data, \ u_int length); } 447 AUE_GETAUID STD { int getauid(uid_t *auid); } 448 AUE_SETAUID STD { int setauid(uid_t *auid); } 449 AUE_GETAUDIT STD { int getaudit(struct auditinfo *auditinfo); } 450 AUE_SETAUDIT STD { int setaudit(struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR STD { int getaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR STD { int setaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL STD { int auditctl(char *path); } 454 AUE_NULL STD { int _umtx_op(void *obj, int op, \ u_long val, void *uaddr1, void *uaddr2); } 455 AUE_THR_NEW STD { int thr_new(struct thr_param *param, \ int param_size); } 456 AUE_NULL STD { int sigqueue(pid_t pid, int signum, void *value); } 457 AUE_MQ_OPEN NOSTD { int kmq_open(const char *path, int flags, \ mode_t mode, const struct mq_attr *attr); } 458 AUE_MQ_SETATTR NOSTD { int kmq_setattr(int mqd, \ const struct mq_attr *attr, \ struct mq_attr *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int kmq_timedreceive(int mqd, \ char *msg_ptr, size_t msg_len, \ unsigned *msg_prio, \ const struct timespec *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int kmq_timedsend(int mqd, \ const char *msg_ptr, size_t msg_len,\ unsigned msg_prio, \ const struct timespec *abs_timeout);} 461 AUE_MQ_NOTIFY NOSTD { int kmq_notify(int mqd, \ const struct sigevent *sigev); } 462 AUE_MQ_UNLINK NOSTD { int kmq_unlink(const char *path); } 463 AUE_NULL STD { int abort2(const char *why, int nargs, void **args); } 464 AUE_NULL STD { int thr_set_name(long id, const char *name); } 465 AUE_AIO_FSYNC STD { int aio_fsync(int op, struct aiocb *aiocbp); } 466 AUE_RTPRIO STD { int rtprio_thread(int function, \ lwpid_t lwpid, struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOSTD { int sctp_peeloff(int sd, uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } 475 AUE_PREAD STD { ssize_t pread(int fd, void *buf, \ size_t nbyte, off_t offset); } 476 AUE_PWRITE STD { ssize_t pwrite(int fd, const void *buf, \ size_t nbyte, off_t offset); } 477 AUE_MMAP STD { caddr_t mmap(caddr_t addr, size_t len, \ int prot, int flags, int fd, off_t pos); } 478 AUE_LSEEK STD { off_t lseek(int fd, off_t offset, \ int whence); } 479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); } 480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); } 481 AUE_THR_KILL2 STD { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN STD { int shm_open(const char *path, int flags, \ mode_t mode); } 483 AUE_SHMUNLINK STD { int shm_unlink(const char *path); } 484 AUE_NULL STD { int cpuset(cpusetid_t *setid); } 485 AUE_NULL STD { int cpuset_setid(cpuwhich_t which, id_t id, \ cpusetid_t setid); } 486 AUE_NULL STD { int cpuset_getid(cpulevel_t level, \ cpuwhich_t which, id_t id, \ cpusetid_t *setid); } 487 AUE_NULL STD { int cpuset_getaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, size_t cpusetsize, \ cpuset_t *mask); } 488 AUE_NULL STD { int cpuset_setaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, size_t cpusetsize, \ const cpuset_t *mask); } 489 AUE_FACCESSAT STD { int faccessat(int fd, char *path, int amode, \ int flag); } 490 AUE_FCHMODAT STD { int fchmodat(int fd, char *path, mode_t mode, \ int flag); } 491 AUE_FCHOWNAT STD { int fchownat(int fd, char *path, uid_t uid, \ gid_t gid, int flag); } 492 AUE_FEXECVE STD { int fexecve(int fd, char **argv, \ char **envv); } -493 AUE_FSTATAT STD { int fstatat(int fd, char *path, \ - struct stat *buf, int flag); } +493 AUE_FSTATAT COMPAT11 { int fstatat(int fd, char *path, \ + struct freebsd11_stat *buf, int flag); } 494 AUE_FUTIMESAT STD { int futimesat(int fd, char *path, \ struct timeval *times); } 495 AUE_LINKAT STD { int linkat(int fd1, char *path1, int fd2, \ char *path2, int flag); } 496 AUE_MKDIRAT STD { int mkdirat(int fd, char *path, mode_t mode); } 497 AUE_MKFIFOAT STD { int mkfifoat(int fd, char *path, mode_t mode); } -498 AUE_MKNODAT STD { int mknodat(int fd, char *path, mode_t mode, \ - dev_t dev); } +498 AUE_MKNODAT COMPAT11 { int mknodat(int fd, char *path, mode_t mode, \ + uint32_t dev); } ; XXX: see the comment for open 499 AUE_OPENAT_RWTC STD { int openat(int fd, char *path, int flag, \ mode_t mode); } 500 AUE_READLINKAT STD { int readlinkat(int fd, char *path, char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT STD { int renameat(int oldfd, char *old, int newfd, \ char *new); } 502 AUE_SYMLINKAT STD { int symlinkat(char *path1, int fd, \ char *path2); } 503 AUE_UNLINKAT STD { int unlinkat(int fd, char *path, int flag); } 504 AUE_POSIX_OPENPT STD { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL NOSTD { int gssd_syscall(char *path); } 506 AUE_JAIL_GET STD { int jail_get(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int jail_set(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE STD { int jail_remove(int jid); } 509 AUE_CLOSEFROM STD { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int __semctl(int semid, int semnum, \ int cmd, union semun *arg); } 511 AUE_MSGCTL NOSTD { int msgctl(int msqid, int cmd, \ struct msqid_ds *buf); } 512 AUE_SHMCTL NOSTD { int shmctl(int shmid, int cmd, \ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET STD { int __cap_rights_get(int version, \ int fd, cap_rights_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } 518 AUE_PDFORK STD { int pdfork(int *fdp, int flags); } 519 AUE_PDKILL STD { int pdkill(int fd, int signum); } 520 AUE_PDGETPID STD { int pdgetpid(int fd, pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec *ts, \ const sigset_t *sm); } 523 AUE_GETLOGINCLASS STD { int getloginclass(char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS STD { int setloginclass(const char *namebuf); } 525 AUE_NULL STD { int rctl_get_racct(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 526 AUE_NULL STD { int rctl_get_rules(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 527 AUE_NULL STD { int rctl_get_limits(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 528 AUE_NULL STD { int rctl_add_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 530 AUE_POSIX_FALLOCATE STD { int posix_fallocate(int fd, \ off_t offset, off_t len); } 531 AUE_POSIX_FADVISE STD { int posix_fadvise(int fd, off_t offset, \ off_t len, int advice); } 532 AUE_WAIT6 STD { int wait6(idtype_t idtype, id_t id, \ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } 533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ const u_long *cmds, size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ u_long *cmds, size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ uint32_t *fcntlrightsp); } 538 AUE_BINDAT STD { int bindat(int fd, int s, caddr_t name, \ int namelen); } 539 AUE_CONNECTAT STD { int connectat(int fd, int s, caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT STD { int chflagsat(int fd, const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT STD { int accept4(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE STD { int pipe2(int *fildes, int flags); } 543 AUE_AIO_MLOCK STD { int aio_mlock(struct aiocb *aiocbp); } 544 AUE_PROCCTL STD { int procctl(idtype_t idtype, id_t id, \ int com, void *data); } 545 AUE_POLL STD { int ppoll(struct pollfd *fds, u_int nfds, \ const struct timespec *ts, \ const sigset_t *set); } 546 AUE_FUTIMES STD { int futimens(int fd, \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int utimensat(int fd, \ char *path, \ struct timespec *times, int flag); } 548 AUE_NULL STD { int numa_getaffinity(cpuwhich_t which, \ id_t id, \ struct vm_domain_policy_entry *policy); } 549 AUE_NULL STD { int numa_setaffinity(cpuwhich_t which, \ id_t id, const struct \ vm_domain_policy_entry *policy); } 550 AUE_FSYNC STD { int fdatasync(int fd); } +551 AUE_FSTAT STD { int fstat(int fd, struct stat *sb); } +552 AUE_FSTATAT STD { int fstatat(int fd, char *path, \ + struct stat *buf, int flag); } +553 AUE_FHSTAT STD { int fhstat(const struct fhandle *u_fhp, \ + struct stat *sb); } +554 AUE_GETDIRENTRIES STD { ssize_t getdirentries(int fd, char *buf, \ + size_t count, off_t *basep); } +555 AUE_STATFS STD { int statfs(char *path, struct statfs *buf); } +556 AUE_FSTATFS STD { int fstatfs(int fd, struct statfs *buf); } +557 AUE_GETFSSTAT STD { int getfsstat(struct statfs *buf, \ + long bufsize, int mode); } +558 AUE_FHSTATFS STD { int fhstatfs(const struct fhandle *u_fhp, \ + struct statfs *buf); } +559 AUE_MKNODAT STD { int mknodat(int fd, char *path, mode_t mode, \ + dev_t dev); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: head/sys/kern/tty.c =================================================================== --- head/sys/kern/tty.c (revision 318735) +++ head/sys/kern/tty.c (revision 318736) @@ -1,2347 +1,2347 @@ /*- * Copyright (c) 2008 Ed Schouten * All rights reserved. * * Portions of this software were developed under sponsorship from Snow * B.V., the Netherlands. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include #include #include #include #include #include #include #include #ifdef COMPAT_43TTY #include #endif /* COMPAT_43TTY */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TTYDEFCHARS #include #undef TTYDEFCHARS #include #include #include static MALLOC_DEFINE(M_TTY, "tty", "tty device"); static void tty_rel_free(struct tty *tp); static TAILQ_HEAD(, tty) tty_list = TAILQ_HEAD_INITIALIZER(tty_list); static struct sx tty_list_sx; SX_SYSINIT(tty_list, &tty_list_sx, "tty list"); static unsigned int tty_list_count = 0; /* Character device of /dev/console. */ static struct cdev *dev_console; static const char *dev_console_filename; /* * Flags that are supported and stored by this implementation. */ #define TTYSUP_IFLAG (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK|ISTRIP|\ INLCR|IGNCR|ICRNL|IXON|IXOFF|IXANY|IMAXBEL) #define TTYSUP_OFLAG (OPOST|ONLCR|TAB3|ONOEOT|OCRNL|ONOCR|ONLRET) #define TTYSUP_LFLAG (ECHOKE|ECHOE|ECHOK|ECHO|ECHONL|ECHOPRT|\ ECHOCTL|ISIG|ICANON|ALTWERASE|IEXTEN|TOSTOP|\ FLUSHO|NOKERNINFO|NOFLSH) #define TTYSUP_CFLAG (CIGNORE|CSIZE|CSTOPB|CREAD|PARENB|PARODD|\ HUPCL|CLOCAL|CCTS_OFLOW|CRTS_IFLOW|CDTR_IFLOW|\ CDSR_OFLOW|CCAR_OFLOW) #define TTY_CALLOUT(tp,d) (dev2unit(d) & TTYUNIT_CALLOUT) static int tty_drainwait = 5 * 60; SYSCTL_INT(_kern, OID_AUTO, tty_drainwait, CTLFLAG_RWTUN, &tty_drainwait, 0, "Default output drain timeout in seconds"); /* * Set TTY buffer sizes. */ #define TTYBUF_MAX 65536 /* * Allocate buffer space if necessary, and set low watermarks, based on speed. * Note that the ttyxxxq_setsize() functions may drop and then reacquire the tty * lock during memory allocation. They will return ENXIO if the tty disappears * while unlocked. */ static int tty_watermarks(struct tty *tp) { size_t bs = 0; int error; /* Provide an input buffer for 2 seconds of data. */ if (tp->t_termios.c_cflag & CREAD) bs = MIN(tp->t_termios.c_ispeed / 5, TTYBUF_MAX); error = ttyinq_setsize(&tp->t_inq, tp, bs); if (error != 0) return (error); /* Set low watermark at 10% (when 90% is available). */ tp->t_inlow = (ttyinq_getallocatedsize(&tp->t_inq) * 9) / 10; /* Provide an output buffer for 2 seconds of data. */ bs = MIN(tp->t_termios.c_ospeed / 5, TTYBUF_MAX); error = ttyoutq_setsize(&tp->t_outq, tp, bs); if (error != 0) return (error); /* Set low watermark at 10% (when 90% is available). */ tp->t_outlow = (ttyoutq_getallocatedsize(&tp->t_outq) * 9) / 10; return (0); } static int tty_drain(struct tty *tp, int leaving) { sbintime_t timeout_at; size_t bytes; int error; if (ttyhook_hashook(tp, getc_inject)) /* buffer is inaccessible */ return (0); /* * For close(), use the recent historic timeout of "1 second without * making progress". For tcdrain(), use t_drainwait as the timeout, * with zero meaning "no timeout" which gives POSIX behavior. */ if (leaving) timeout_at = getsbinuptime() + SBT_1S; else if (tp->t_drainwait != 0) timeout_at = getsbinuptime() + SBT_1S * tp->t_drainwait; else timeout_at = 0; /* * Poll the output buffer and the hardware for completion, at 10 Hz. * Polling is required for devices which are not able to signal an * interrupt when the transmitter becomes idle (most USB serial devs). * The unusual structure of this loop ensures we check for busy one more * time after tty_timedwait() returns EWOULDBLOCK, so that success has * higher priority than timeout if the IO completed in the last 100mS. */ error = 0; bytes = ttyoutq_bytesused(&tp->t_outq); for (;;) { if (ttyoutq_bytesused(&tp->t_outq) == 0 && !ttydevsw_busy(tp)) return (0); if (error != 0) return (error); ttydevsw_outwakeup(tp); error = tty_timedwait(tp, &tp->t_outwait, hz / 10); if (error != 0 && error != EWOULDBLOCK) return (error); else if (timeout_at == 0 || getsbinuptime() < timeout_at) error = 0; else if (leaving && ttyoutq_bytesused(&tp->t_outq) < bytes) { /* In close, making progress, grant an extra second. */ error = 0; timeout_at += SBT_1S; bytes = ttyoutq_bytesused(&tp->t_outq); } } } /* * Though ttydev_enter() and ttydev_leave() seem to be related, they * don't have to be used together. ttydev_enter() is used by the cdev * operations to prevent an actual operation from being processed when * the TTY has been abandoned. ttydev_leave() is used by ttydev_open() * and ttydev_close() to determine whether per-TTY data should be * deallocated. */ static __inline int ttydev_enter(struct tty *tp) { tty_lock(tp); if (tty_gone(tp) || !tty_opened(tp)) { /* Device is already gone. */ tty_unlock(tp); return (ENXIO); } return (0); } static void ttydev_leave(struct tty *tp) { tty_lock_assert(tp, MA_OWNED); if (tty_opened(tp) || tp->t_flags & TF_OPENCLOSE) { /* Device is still opened somewhere. */ tty_unlock(tp); return; } tp->t_flags |= TF_OPENCLOSE; /* Stop asynchronous I/O. */ funsetown(&tp->t_sigio); /* Remove console TTY. */ if (constty == tp) constty_clear(); /* Drain any output. */ if (!tty_gone(tp)) tty_drain(tp, 1); ttydisc_close(tp); /* Free i/o queues now since they might be large. */ ttyinq_free(&tp->t_inq); tp->t_inlow = 0; ttyoutq_free(&tp->t_outq); tp->t_outlow = 0; knlist_clear(&tp->t_inpoll.si_note, 1); knlist_clear(&tp->t_outpoll.si_note, 1); if (!tty_gone(tp)) ttydevsw_close(tp); tp->t_flags &= ~TF_OPENCLOSE; cv_broadcast(&tp->t_dcdwait); tty_rel_free(tp); } /* * Operations that are exposed through the character device in /dev. */ static int ttydev_open(struct cdev *dev, int oflags, int devtype __unused, struct thread *td) { struct tty *tp; int error; tp = dev->si_drv1; error = 0; tty_lock(tp); if (tty_gone(tp)) { /* Device is already gone. */ tty_unlock(tp); return (ENXIO); } /* * Block when other processes are currently opening or closing * the TTY. */ while (tp->t_flags & TF_OPENCLOSE) { error = tty_wait(tp, &tp->t_dcdwait); if (error != 0) { tty_unlock(tp); return (error); } } tp->t_flags |= TF_OPENCLOSE; /* * Make sure the "tty" and "cua" device cannot be opened at the * same time. The console is a "tty" device. */ if (TTY_CALLOUT(tp, dev)) { if (tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) { error = EBUSY; goto done; } } else { if (tp->t_flags & TF_OPENED_OUT) { error = EBUSY; goto done; } } if (tp->t_flags & TF_EXCLUDE && priv_check(td, PRIV_TTY_EXCLUSIVE)) { error = EBUSY; goto done; } if (!tty_opened(tp)) { /* Set proper termios flags. */ if (TTY_CALLOUT(tp, dev)) tp->t_termios = tp->t_termios_init_out; else tp->t_termios = tp->t_termios_init_in; ttydevsw_param(tp, &tp->t_termios); /* Prevent modem control on callout devices and /dev/console. */ if (TTY_CALLOUT(tp, dev) || dev == dev_console) tp->t_termios.c_cflag |= CLOCAL; ttydevsw_modem(tp, SER_DTR|SER_RTS, 0); error = ttydevsw_open(tp); if (error != 0) goto done; ttydisc_open(tp); error = tty_watermarks(tp); if (error != 0) goto done; } /* Wait for Carrier Detect. */ if ((oflags & O_NONBLOCK) == 0 && (tp->t_termios.c_cflag & CLOCAL) == 0) { while ((ttydevsw_modem(tp, 0, 0) & SER_DCD) == 0) { error = tty_wait(tp, &tp->t_dcdwait); if (error != 0) goto done; } } if (dev == dev_console) tp->t_flags |= TF_OPENED_CONS; else if (TTY_CALLOUT(tp, dev)) tp->t_flags |= TF_OPENED_OUT; else tp->t_flags |= TF_OPENED_IN; MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 || (tp->t_flags & TF_OPENED_OUT) == 0); done: tp->t_flags &= ~TF_OPENCLOSE; cv_broadcast(&tp->t_dcdwait); ttydev_leave(tp); return (error); } static int ttydev_close(struct cdev *dev, int fflag, int devtype __unused, struct thread *td __unused) { struct tty *tp = dev->si_drv1; tty_lock(tp); /* * Don't actually close the device if it is being used as the * console. */ MPASS((tp->t_flags & (TF_OPENED_CONS | TF_OPENED_IN)) == 0 || (tp->t_flags & TF_OPENED_OUT) == 0); if (dev == dev_console) tp->t_flags &= ~TF_OPENED_CONS; else tp->t_flags &= ~(TF_OPENED_IN|TF_OPENED_OUT); if (tp->t_flags & TF_OPENED) { tty_unlock(tp); return (0); } /* If revoking, flush output now to avoid draining it later. */ if (fflag & FREVOKE) tty_flush(tp, FWRITE); tp->t_flags &= ~TF_EXCLUDE; /* Properly wake up threads that are stuck - revoke(). */ tp->t_revokecnt++; tty_wakeup(tp, FREAD|FWRITE); cv_broadcast(&tp->t_bgwait); cv_broadcast(&tp->t_dcdwait); ttydev_leave(tp); return (0); } static __inline int tty_is_ctty(struct tty *tp, struct proc *p) { tty_lock_assert(tp, MA_OWNED); return (p->p_session == tp->t_session && p->p_flag & P_CONTROLT); } int tty_wait_background(struct tty *tp, struct thread *td, int sig) { struct proc *p = td->td_proc; struct pgrp *pg; ksiginfo_t ksi; int error; MPASS(sig == SIGTTIN || sig == SIGTTOU); tty_lock_assert(tp, MA_OWNED); for (;;) { PROC_LOCK(p); /* * The process should only sleep, when: * - This terminal is the controlling terminal * - Its process group is not the foreground process * group * - The parent process isn't waiting for the child to * exit * - the signal to send to the process isn't masked */ if (!tty_is_ctty(tp, p) || p->p_pgrp == tp->t_pgrp) { /* Allow the action to happen. */ PROC_UNLOCK(p); return (0); } if (SIGISMEMBER(p->p_sigacts->ps_sigignore, sig) || SIGISMEMBER(td->td_sigmask, sig)) { /* Only allow them in write()/ioctl(). */ PROC_UNLOCK(p); return (sig == SIGTTOU ? 0 : EIO); } pg = p->p_pgrp; if (p->p_flag & P_PPWAIT || pg->pg_jobc == 0) { /* Don't allow the action to happen. */ PROC_UNLOCK(p); return (EIO); } PROC_UNLOCK(p); /* * Send the signal and sleep until we're the new * foreground process group. */ if (sig != 0) { ksiginfo_init(&ksi); ksi.ksi_code = SI_KERNEL; ksi.ksi_signo = sig; sig = 0; } PGRP_LOCK(pg); pgsignal(pg, ksi.ksi_signo, 1, &ksi); PGRP_UNLOCK(pg); error = tty_wait(tp, &tp->t_bgwait); if (error) return (error); } } static int ttydev_read(struct cdev *dev, struct uio *uio, int ioflag) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) goto done; error = ttydisc_read(tp, uio, ioflag); tty_unlock(tp); /* * The read() call should not throw an error when the device is * being destroyed. Silently convert it to an EOF. */ done: if (error == ENXIO) error = 0; return (error); } static int ttydev_write(struct cdev *dev, struct uio *uio, int ioflag) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); if (tp->t_termios.c_lflag & TOSTOP) { error = tty_wait_background(tp, curthread, SIGTTOU); if (error) goto done; } if (ioflag & IO_NDELAY && tp->t_flags & TF_BUSY_OUT) { /* Allow non-blocking writes to bypass serialization. */ error = ttydisc_write(tp, uio, ioflag); } else { /* Serialize write() calls. */ while (tp->t_flags & TF_BUSY_OUT) { error = tty_wait(tp, &tp->t_outserwait); if (error) goto done; } tp->t_flags |= TF_BUSY_OUT; error = ttydisc_write(tp, uio, ioflag); tp->t_flags &= ~TF_BUSY_OUT; cv_signal(&tp->t_outserwait); } done: tty_unlock(tp); return (error); } static int ttydev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); switch (cmd) { case TIOCCBRK: case TIOCCONS: case TIOCDRAIN: case TIOCEXCL: case TIOCFLUSH: case TIOCNXCL: case TIOCSBRK: case TIOCSCTTY: case TIOCSETA: case TIOCSETAF: case TIOCSETAW: case TIOCSPGRP: case TIOCSTART: case TIOCSTAT: case TIOCSTI: case TIOCSTOP: case TIOCSWINSZ: #if 0 case TIOCSDRAINWAIT: case TIOCSETD: #endif #ifdef COMPAT_43TTY case TIOCLBIC: case TIOCLBIS: case TIOCLSET: case TIOCSETC: case OTIOCSETD: case TIOCSETN: case TIOCSETP: case TIOCSLTC: #endif /* COMPAT_43TTY */ /* * If the ioctl() causes the TTY to be modified, let it * wait in the background. */ error = tty_wait_background(tp, curthread, SIGTTOU); if (error) goto done; } if (cmd == TIOCSETA || cmd == TIOCSETAW || cmd == TIOCSETAF) { struct termios *old = &tp->t_termios; struct termios *new = (struct termios *)data; struct termios *lock = TTY_CALLOUT(tp, dev) ? &tp->t_termios_lock_out : &tp->t_termios_lock_in; int cc; /* * Lock state devices. Just overwrite the values of the * commands that are currently in use. */ new->c_iflag = (old->c_iflag & lock->c_iflag) | (new->c_iflag & ~lock->c_iflag); new->c_oflag = (old->c_oflag & lock->c_oflag) | (new->c_oflag & ~lock->c_oflag); new->c_cflag = (old->c_cflag & lock->c_cflag) | (new->c_cflag & ~lock->c_cflag); new->c_lflag = (old->c_lflag & lock->c_lflag) | (new->c_lflag & ~lock->c_lflag); for (cc = 0; cc < NCCS; ++cc) if (lock->c_cc[cc]) new->c_cc[cc] = old->c_cc[cc]; if (lock->c_ispeed) new->c_ispeed = old->c_ispeed; if (lock->c_ospeed) new->c_ospeed = old->c_ospeed; } error = tty_ioctl(tp, cmd, data, fflag, td); done: tty_unlock(tp); return (error); } static int ttydev_poll(struct cdev *dev, int events, struct thread *td) { struct tty *tp = dev->si_drv1; int error, revents = 0; error = ttydev_enter(tp); if (error) return ((events & (POLLIN|POLLRDNORM)) | POLLHUP); if (events & (POLLIN|POLLRDNORM)) { /* See if we can read something. */ if (ttydisc_read_poll(tp) > 0) revents |= events & (POLLIN|POLLRDNORM); } if (tp->t_flags & TF_ZOMBIE) { /* Hangup flag on zombie state. */ revents |= POLLHUP; } else if (events & (POLLOUT|POLLWRNORM)) { /* See if we can write something. */ if (ttydisc_write_poll(tp) > 0) revents |= events & (POLLOUT|POLLWRNORM); } if (revents == 0) { if (events & (POLLIN|POLLRDNORM)) selrecord(td, &tp->t_inpoll); if (events & (POLLOUT|POLLWRNORM)) selrecord(td, &tp->t_outpoll); } tty_unlock(tp); return (revents); } static int ttydev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { struct tty *tp = dev->si_drv1; int error; /* Handle mmap() through the driver. */ error = ttydev_enter(tp); if (error) return (-1); error = ttydevsw_mmap(tp, offset, paddr, nprot, memattr); tty_unlock(tp); return (error); } /* * kqueue support. */ static void tty_kqops_read_detach(struct knote *kn) { struct tty *tp = kn->kn_hook; knlist_remove(&tp->t_inpoll.si_note, kn, 0); } static int tty_kqops_read_event(struct knote *kn, long hint __unused) { struct tty *tp = kn->kn_hook; tty_lock_assert(tp, MA_OWNED); if (tty_gone(tp) || tp->t_flags & TF_ZOMBIE) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_read_poll(tp); return (kn->kn_data > 0); } } static void tty_kqops_write_detach(struct knote *kn) { struct tty *tp = kn->kn_hook; knlist_remove(&tp->t_outpoll.si_note, kn, 0); } static int tty_kqops_write_event(struct knote *kn, long hint __unused) { struct tty *tp = kn->kn_hook; tty_lock_assert(tp, MA_OWNED); if (tty_gone(tp)) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_write_poll(tp); return (kn->kn_data > 0); } } static struct filterops tty_kqops_read = { .f_isfd = 1, .f_detach = tty_kqops_read_detach, .f_event = tty_kqops_read_event, }; static struct filterops tty_kqops_write = { .f_isfd = 1, .f_detach = tty_kqops_write_detach, .f_event = tty_kqops_write_event, }; static int ttydev_kqfilter(struct cdev *dev, struct knote *kn) { struct tty *tp = dev->si_drv1; int error; error = ttydev_enter(tp); if (error) return (error); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_hook = tp; kn->kn_fop = &tty_kqops_read; knlist_add(&tp->t_inpoll.si_note, kn, 1); break; case EVFILT_WRITE: kn->kn_hook = tp; kn->kn_fop = &tty_kqops_write; knlist_add(&tp->t_outpoll.si_note, kn, 1); break; default: error = EINVAL; break; } tty_unlock(tp); return (error); } static struct cdevsw ttydev_cdevsw = { .d_version = D_VERSION, .d_open = ttydev_open, .d_close = ttydev_close, .d_read = ttydev_read, .d_write = ttydev_write, .d_ioctl = ttydev_ioctl, .d_kqfilter = ttydev_kqfilter, .d_poll = ttydev_poll, .d_mmap = ttydev_mmap, .d_name = "ttydev", .d_flags = D_TTY, }; /* * Init/lock-state devices */ static int ttyil_open(struct cdev *dev, int oflags __unused, int devtype __unused, struct thread *td) { struct tty *tp; int error; tp = dev->si_drv1; error = 0; tty_lock(tp); if (tty_gone(tp)) error = ENODEV; tty_unlock(tp); return (error); } static int ttyil_close(struct cdev *dev __unused, int flag __unused, int mode __unused, struct thread *td __unused) { return (0); } static int ttyil_rdwr(struct cdev *dev __unused, struct uio *uio __unused, int ioflag __unused) { return (ENODEV); } static int ttyil_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct tty *tp = dev->si_drv1; int error; tty_lock(tp); if (tty_gone(tp)) { error = ENODEV; goto done; } error = ttydevsw_cioctl(tp, dev2unit(dev), cmd, data, td); if (error != ENOIOCTL) goto done; error = 0; switch (cmd) { case TIOCGETA: /* Obtain terminal flags through tcgetattr(). */ *(struct termios*)data = *(struct termios*)dev->si_drv2; break; case TIOCSETA: /* Set terminal flags through tcsetattr(). */ error = priv_check(td, PRIV_TTY_SETA); if (error) break; *(struct termios*)dev->si_drv2 = *(struct termios*)data; break; case TIOCGETD: *(int *)data = TTYDISC; break; case TIOCGWINSZ: bzero(data, sizeof(struct winsize)); break; default: error = ENOTTY; } done: tty_unlock(tp); return (error); } static struct cdevsw ttyil_cdevsw = { .d_version = D_VERSION, .d_open = ttyil_open, .d_close = ttyil_close, .d_read = ttyil_rdwr, .d_write = ttyil_rdwr, .d_ioctl = ttyil_ioctl, .d_name = "ttyil", .d_flags = D_TTY, }; static void tty_init_termios(struct tty *tp) { struct termios *t = &tp->t_termios_init_in; t->c_cflag = TTYDEF_CFLAG; t->c_iflag = TTYDEF_IFLAG; t->c_lflag = TTYDEF_LFLAG; t->c_oflag = TTYDEF_OFLAG; t->c_ispeed = TTYDEF_SPEED; t->c_ospeed = TTYDEF_SPEED; memcpy(&t->c_cc, ttydefchars, sizeof ttydefchars); tp->t_termios_init_out = *t; } void tty_init_console(struct tty *tp, speed_t s) { struct termios *ti = &tp->t_termios_init_in; struct termios *to = &tp->t_termios_init_out; if (s != 0) { ti->c_ispeed = ti->c_ospeed = s; to->c_ispeed = to->c_ospeed = s; } ti->c_cflag |= CLOCAL; to->c_cflag |= CLOCAL; } /* * Standard device routine implementations, mostly meant for * pseudo-terminal device drivers. When a driver creates a new terminal * device class, missing routines are patched. */ static int ttydevsw_defopen(struct tty *tp __unused) { return (0); } static void ttydevsw_defclose(struct tty *tp __unused) { } static void ttydevsw_defoutwakeup(struct tty *tp __unused) { panic("Terminal device has output, while not implemented"); } static void ttydevsw_definwakeup(struct tty *tp __unused) { } static int ttydevsw_defioctl(struct tty *tp __unused, u_long cmd __unused, caddr_t data __unused, struct thread *td __unused) { return (ENOIOCTL); } static int ttydevsw_defcioctl(struct tty *tp __unused, int unit __unused, u_long cmd __unused, caddr_t data __unused, struct thread *td __unused) { return (ENOIOCTL); } static int ttydevsw_defparam(struct tty *tp __unused, struct termios *t) { /* * Allow the baud rate to be adjusted for pseudo-devices, but at * least restrict it to 115200 to prevent excessive buffer * usage. Also disallow 0, to prevent foot shooting. */ if (t->c_ispeed < B50) t->c_ispeed = B50; else if (t->c_ispeed > B115200) t->c_ispeed = B115200; if (t->c_ospeed < B50) t->c_ospeed = B50; else if (t->c_ospeed > B115200) t->c_ospeed = B115200; t->c_cflag |= CREAD; return (0); } static int ttydevsw_defmodem(struct tty *tp __unused, int sigon __unused, int sigoff __unused) { /* Simulate a carrier to make the TTY layer happy. */ return (SER_DCD); } static int ttydevsw_defmmap(struct tty *tp __unused, vm_ooffset_t offset __unused, vm_paddr_t *paddr __unused, int nprot __unused, vm_memattr_t *memattr __unused) { return (-1); } static void ttydevsw_defpktnotify(struct tty *tp __unused, char event __unused) { } static void ttydevsw_deffree(void *softc __unused) { panic("Terminal device freed without a free-handler"); } static bool ttydevsw_defbusy(struct tty *tp __unused) { return (FALSE); } /* * TTY allocation and deallocation. TTY devices can be deallocated when * the driver doesn't use it anymore, when the TTY isn't a session's * controlling TTY and when the device node isn't opened through devfs. */ struct tty * tty_alloc(struct ttydevsw *tsw, void *sc) { return (tty_alloc_mutex(tsw, sc, NULL)); } struct tty * tty_alloc_mutex(struct ttydevsw *tsw, void *sc, struct mtx *mutex) { struct tty *tp; /* Make sure the driver defines all routines. */ #define PATCH_FUNC(x) do { \ if (tsw->tsw_ ## x == NULL) \ tsw->tsw_ ## x = ttydevsw_def ## x; \ } while (0) PATCH_FUNC(open); PATCH_FUNC(close); PATCH_FUNC(outwakeup); PATCH_FUNC(inwakeup); PATCH_FUNC(ioctl); PATCH_FUNC(cioctl); PATCH_FUNC(param); PATCH_FUNC(modem); PATCH_FUNC(mmap); PATCH_FUNC(pktnotify); PATCH_FUNC(free); PATCH_FUNC(busy); #undef PATCH_FUNC tp = malloc(sizeof(struct tty), M_TTY, M_WAITOK|M_ZERO); tp->t_devsw = tsw; tp->t_devswsoftc = sc; tp->t_flags = tsw->tsw_flags; tp->t_drainwait = tty_drainwait; tty_init_termios(tp); cv_init(&tp->t_inwait, "ttyin"); cv_init(&tp->t_outwait, "ttyout"); cv_init(&tp->t_outserwait, "ttyosr"); cv_init(&tp->t_bgwait, "ttybg"); cv_init(&tp->t_dcdwait, "ttydcd"); /* Allow drivers to use a custom mutex to lock the TTY. */ if (mutex != NULL) { tp->t_mtx = mutex; } else { tp->t_mtx = &tp->t_mtxobj; mtx_init(&tp->t_mtxobj, "ttymtx", NULL, MTX_DEF); } knlist_init_mtx(&tp->t_inpoll.si_note, tp->t_mtx); knlist_init_mtx(&tp->t_outpoll.si_note, tp->t_mtx); return (tp); } static void tty_dealloc(void *arg) { struct tty *tp = arg; /* * ttyydev_leave() usually frees the i/o queues earlier, but it is * not always called between queue allocation and here. The queues * may be allocated by ioctls on a pty control device without the * corresponding pty slave device ever being open, or after it is * closed. */ ttyinq_free(&tp->t_inq); ttyoutq_free(&tp->t_outq); seldrain(&tp->t_inpoll); seldrain(&tp->t_outpoll); knlist_destroy(&tp->t_inpoll.si_note); knlist_destroy(&tp->t_outpoll.si_note); cv_destroy(&tp->t_inwait); cv_destroy(&tp->t_outwait); cv_destroy(&tp->t_bgwait); cv_destroy(&tp->t_dcdwait); cv_destroy(&tp->t_outserwait); if (tp->t_mtx == &tp->t_mtxobj) mtx_destroy(&tp->t_mtxobj); ttydevsw_free(tp); free(tp, M_TTY); } static void tty_rel_free(struct tty *tp) { struct cdev *dev; tty_lock_assert(tp, MA_OWNED); #define TF_ACTIVITY (TF_GONE|TF_OPENED|TF_HOOK|TF_OPENCLOSE) if (tp->t_sessioncnt != 0 || (tp->t_flags & TF_ACTIVITY) != TF_GONE) { /* TTY is still in use. */ tty_unlock(tp); return; } /* TTY can be deallocated. */ dev = tp->t_dev; tp->t_dev = NULL; tty_unlock(tp); if (dev != NULL) { sx_xlock(&tty_list_sx); TAILQ_REMOVE(&tty_list, tp, t_list); tty_list_count--; sx_xunlock(&tty_list_sx); destroy_dev_sched_cb(dev, tty_dealloc, tp); } } void tty_rel_pgrp(struct tty *tp, struct pgrp *pg) { MPASS(tp->t_sessioncnt > 0); tty_lock_assert(tp, MA_OWNED); if (tp->t_pgrp == pg) tp->t_pgrp = NULL; tty_unlock(tp); } void tty_rel_sess(struct tty *tp, struct session *sess) { MPASS(tp->t_sessioncnt > 0); /* Current session has left. */ if (tp->t_session == sess) { tp->t_session = NULL; MPASS(tp->t_pgrp == NULL); } tp->t_sessioncnt--; tty_rel_free(tp); } void tty_rel_gone(struct tty *tp) { MPASS(!tty_gone(tp)); /* Simulate carrier removal. */ ttydisc_modem(tp, 0); /* Wake up all blocked threads. */ tty_wakeup(tp, FREAD|FWRITE); cv_broadcast(&tp->t_bgwait); cv_broadcast(&tp->t_dcdwait); tp->t_flags |= TF_GONE; tty_rel_free(tp); } /* * Exposing information about current TTY's through sysctl */ static void tty_to_xtty(struct tty *tp, struct xtty *xt) { tty_lock_assert(tp, MA_OWNED); xt->xt_size = sizeof(struct xtty); xt->xt_insize = ttyinq_getsize(&tp->t_inq); xt->xt_incc = ttyinq_bytescanonicalized(&tp->t_inq); xt->xt_inlc = ttyinq_bytesline(&tp->t_inq); xt->xt_inlow = tp->t_inlow; xt->xt_outsize = ttyoutq_getsize(&tp->t_outq); xt->xt_outcc = ttyoutq_bytesused(&tp->t_outq); xt->xt_outlow = tp->t_outlow; xt->xt_column = tp->t_column; xt->xt_pgid = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; xt->xt_sid = tp->t_session ? tp->t_session->s_sid : 0; xt->xt_flags = tp->t_flags; - xt->xt_dev = tp->t_dev ? dev2udev(tp->t_dev) : NODEV; + xt->xt_dev = tp->t_dev ? dev2udev(tp->t_dev) : (uint32_t)NODEV; } static int sysctl_kern_ttys(SYSCTL_HANDLER_ARGS) { unsigned long lsize; struct xtty *xtlist, *xt; struct tty *tp; int error; sx_slock(&tty_list_sx); lsize = tty_list_count * sizeof(struct xtty); if (lsize == 0) { sx_sunlock(&tty_list_sx); return (0); } xtlist = xt = malloc(lsize, M_TTY, M_WAITOK); TAILQ_FOREACH(tp, &tty_list, t_list) { tty_lock(tp); tty_to_xtty(tp, xt); tty_unlock(tp); xt++; } sx_sunlock(&tty_list_sx); error = SYSCTL_OUT(req, xtlist, lsize); free(xtlist, M_TTY); return (error); } SYSCTL_PROC(_kern, OID_AUTO, ttys, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_ttys, "S,xtty", "List of TTYs"); /* * Device node creation. Device has been set up, now we can expose it to * the user. */ int tty_makedevf(struct tty *tp, struct ucred *cred, int flags, const char *fmt, ...) { va_list ap; struct make_dev_args args; struct cdev *dev, *init, *lock, *cua, *cinit, *clock; const char *prefix = "tty"; char name[SPECNAMELEN - 3]; /* for "tty" and "cua". */ uid_t uid; gid_t gid; mode_t mode; int error; /* Remove "tty" prefix from devices like PTY's. */ if (tp->t_flags & TF_NOPREFIX) prefix = ""; va_start(ap, fmt); vsnrprintf(name, sizeof name, 32, fmt, ap); va_end(ap); if (cred == NULL) { /* System device. */ uid = UID_ROOT; gid = GID_WHEEL; mode = S_IRUSR|S_IWUSR; } else { /* User device. */ uid = cred->cr_ruid; gid = GID_TTY; mode = S_IRUSR|S_IWUSR|S_IWGRP; } flags = flags & TTYMK_CLONING ? MAKEDEV_REF : 0; flags |= MAKEDEV_CHECKNAME; /* Master call-in device. */ make_dev_args_init(&args); args.mda_flags = flags; args.mda_devsw = &ttydev_cdevsw; args.mda_cr = cred; args.mda_uid = uid; args.mda_gid = gid; args.mda_mode = mode; args.mda_si_drv1 = tp; error = make_dev_s(&args, &dev, "%s%s", prefix, name); if (error != 0) return (error); tp->t_dev = dev; init = lock = cua = cinit = clock = NULL; /* Slave call-in devices. */ if (tp->t_flags & TF_INITLOCK) { args.mda_devsw = &ttyil_cdevsw; args.mda_unit = TTYUNIT_INIT; args.mda_si_drv1 = tp; args.mda_si_drv2 = &tp->t_termios_init_in; error = make_dev_s(&args, &init, "%s%s.init", prefix, name); if (error != 0) goto fail; dev_depends(dev, init); args.mda_unit = TTYUNIT_LOCK; args.mda_si_drv2 = &tp->t_termios_lock_in; error = make_dev_s(&args, &lock, "%s%s.lock", prefix, name); if (error != 0) goto fail; dev_depends(dev, lock); } /* Call-out devices. */ if (tp->t_flags & TF_CALLOUT) { make_dev_args_init(&args); args.mda_flags = flags; args.mda_devsw = &ttydev_cdevsw; args.mda_cr = cred; args.mda_uid = UID_UUCP; args.mda_gid = GID_DIALER; args.mda_mode = 0660; args.mda_unit = TTYUNIT_CALLOUT; args.mda_si_drv1 = tp; error = make_dev_s(&args, &cua, "cua%s", name); if (error != 0) goto fail; dev_depends(dev, cua); /* Slave call-out devices. */ if (tp->t_flags & TF_INITLOCK) { args.mda_devsw = &ttyil_cdevsw; args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_INIT; args.mda_si_drv2 = &tp->t_termios_init_out; error = make_dev_s(&args, &cinit, "cua%s.init", name); if (error != 0) goto fail; dev_depends(dev, cinit); args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_LOCK; args.mda_si_drv2 = &tp->t_termios_lock_out; error = make_dev_s(&args, &clock, "cua%s.lock", name); if (error != 0) goto fail; dev_depends(dev, clock); } } sx_xlock(&tty_list_sx); TAILQ_INSERT_TAIL(&tty_list, tp, t_list); tty_list_count++; sx_xunlock(&tty_list_sx); return (0); fail: destroy_dev(dev); if (init) destroy_dev(init); if (lock) destroy_dev(lock); if (cinit) destroy_dev(cinit); if (clock) destroy_dev(clock); return (error); } /* * Signalling processes. */ void tty_signal_sessleader(struct tty *tp, int sig) { struct proc *p; tty_lock_assert(tp, MA_OWNED); MPASS(sig >= 1 && sig < NSIG); /* Make signals start output again. */ tp->t_flags &= ~TF_STOPPED; if (tp->t_session != NULL && tp->t_session->s_leader != NULL) { p = tp->t_session->s_leader; PROC_LOCK(p); kern_psignal(p, sig); PROC_UNLOCK(p); } } void tty_signal_pgrp(struct tty *tp, int sig) { ksiginfo_t ksi; tty_lock_assert(tp, MA_OWNED); MPASS(sig >= 1 && sig < NSIG); /* Make signals start output again. */ tp->t_flags &= ~TF_STOPPED; if (sig == SIGINFO && !(tp->t_termios.c_lflag & NOKERNINFO)) tty_info(tp); if (tp->t_pgrp != NULL) { ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; PGRP_LOCK(tp->t_pgrp); pgsignal(tp->t_pgrp, sig, 1, &ksi); PGRP_UNLOCK(tp->t_pgrp); } } void tty_wakeup(struct tty *tp, int flags) { if (tp->t_flags & TF_ASYNC && tp->t_sigio != NULL) pgsigio(&tp->t_sigio, SIGIO, (tp->t_session != NULL)); if (flags & FWRITE) { cv_broadcast(&tp->t_outwait); selwakeup(&tp->t_outpoll); KNOTE_LOCKED(&tp->t_outpoll.si_note, 0); } if (flags & FREAD) { cv_broadcast(&tp->t_inwait); selwakeup(&tp->t_inpoll); KNOTE_LOCKED(&tp->t_inpoll.si_note, 0); } } int tty_wait(struct tty *tp, struct cv *cv) { int error; int revokecnt = tp->t_revokecnt; tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED); MPASS(!tty_gone(tp)); error = cv_wait_sig(cv, tp->t_mtx); /* Bail out when the device slipped away. */ if (tty_gone(tp)) return (ENXIO); /* Restart the system call when we may have been revoked. */ if (tp->t_revokecnt != revokecnt) return (ERESTART); return (error); } int tty_timedwait(struct tty *tp, struct cv *cv, int hz) { int error; int revokecnt = tp->t_revokecnt; tty_lock_assert(tp, MA_OWNED|MA_NOTRECURSED); MPASS(!tty_gone(tp)); error = cv_timedwait_sig(cv, tp->t_mtx, hz); /* Bail out when the device slipped away. */ if (tty_gone(tp)) return (ENXIO); /* Restart the system call when we may have been revoked. */ if (tp->t_revokecnt != revokecnt) return (ERESTART); return (error); } void tty_flush(struct tty *tp, int flags) { if (flags & FWRITE) { tp->t_flags &= ~TF_HIWAT_OUT; ttyoutq_flush(&tp->t_outq); tty_wakeup(tp, FWRITE); if (!tty_gone(tp)) { ttydevsw_outwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_FLUSHWRITE); } } if (flags & FREAD) { tty_hiwat_in_unblock(tp); ttyinq_flush(&tp->t_inq); tty_wakeup(tp, FREAD); if (!tty_gone(tp)) { ttydevsw_inwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_FLUSHREAD); } } } void tty_set_winsize(struct tty *tp, const struct winsize *wsz) { if (memcmp(&tp->t_winsize, wsz, sizeof(*wsz)) == 0) return; tp->t_winsize = *wsz; tty_signal_pgrp(tp, SIGWINCH); } static int tty_generic_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td) { int error; switch (cmd) { /* * Modem commands. * The SER_* and TIOCM_* flags are the same, but one bit * shifted. I don't know why. */ case TIOCSDTR: ttydevsw_modem(tp, SER_DTR, 0); return (0); case TIOCCDTR: ttydevsw_modem(tp, 0, SER_DTR); return (0); case TIOCMSET: { int bits = *(int *)data; ttydevsw_modem(tp, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1, ((~bits) & (TIOCM_DTR | TIOCM_RTS)) >> 1); return (0); } case TIOCMBIS: { int bits = *(int *)data; ttydevsw_modem(tp, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1, 0); return (0); } case TIOCMBIC: { int bits = *(int *)data; ttydevsw_modem(tp, 0, (bits & (TIOCM_DTR | TIOCM_RTS)) >> 1); return (0); } case TIOCMGET: *(int *)data = TIOCM_LE + (ttydevsw_modem(tp, 0, 0) << 1); return (0); case FIOASYNC: if (*(int *)data) tp->t_flags |= TF_ASYNC; else tp->t_flags &= ~TF_ASYNC; return (0); case FIONBIO: /* This device supports non-blocking operation. */ return (0); case FIONREAD: *(int *)data = ttyinq_bytescanonicalized(&tp->t_inq); return (0); case FIONWRITE: case TIOCOUTQ: *(int *)data = ttyoutq_bytesused(&tp->t_outq); return (0); case FIOSETOWN: if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc)) /* Not allowed to set ownership. */ return (ENOTTY); /* Temporarily unlock the TTY to set ownership. */ tty_unlock(tp); error = fsetown(*(int *)data, &tp->t_sigio); tty_lock(tp); return (error); case FIOGETOWN: if (tp->t_session != NULL && !tty_is_ctty(tp, td->td_proc)) /* Not allowed to set ownership. */ return (ENOTTY); /* Get ownership. */ *(int *)data = fgetown(&tp->t_sigio); return (0); case TIOCGETA: /* Obtain terminal flags through tcgetattr(). */ *(struct termios*)data = tp->t_termios; return (0); case TIOCSETA: case TIOCSETAW: case TIOCSETAF: { struct termios *t = data; /* * Who makes up these funny rules? According to POSIX, * input baud rate is set equal to the output baud rate * when zero. */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; /* Discard any unsupported bits. */ t->c_iflag &= TTYSUP_IFLAG; t->c_oflag &= TTYSUP_OFLAG; t->c_lflag &= TTYSUP_LFLAG; t->c_cflag &= TTYSUP_CFLAG; /* Set terminal flags through tcsetattr(). */ if (cmd == TIOCSETAW || cmd == TIOCSETAF) { error = tty_drain(tp, 0); if (error) return (error); if (cmd == TIOCSETAF) tty_flush(tp, FREAD); } /* * Only call param() when the flags really change. */ if ((t->c_cflag & CIGNORE) == 0 && (tp->t_termios.c_cflag != t->c_cflag || ((tp->t_termios.c_iflag ^ t->c_iflag) & (IXON|IXOFF|IXANY)) || tp->t_termios.c_ispeed != t->c_ispeed || tp->t_termios.c_ospeed != t->c_ospeed)) { error = ttydevsw_param(tp, t); if (error) return (error); /* XXX: CLOCAL? */ tp->t_termios.c_cflag = t->c_cflag & ~CIGNORE; tp->t_termios.c_ispeed = t->c_ispeed; tp->t_termios.c_ospeed = t->c_ospeed; /* Baud rate has changed - update watermarks. */ error = tty_watermarks(tp); if (error) return (error); } /* Copy new non-device driver parameters. */ tp->t_termios.c_iflag = t->c_iflag; tp->t_termios.c_oflag = t->c_oflag; tp->t_termios.c_lflag = t->c_lflag; memcpy(&tp->t_termios.c_cc, t->c_cc, sizeof t->c_cc); ttydisc_optimize(tp); if ((t->c_lflag & ICANON) == 0) { /* * When in non-canonical mode, wake up all * readers. Canonicalize any partial input. VMIN * and VTIME could also be adjusted. */ ttyinq_canonicalize(&tp->t_inq); tty_wakeup(tp, FREAD); } /* * For packet mode: notify the PTY consumer that VSTOP * and VSTART may have been changed. */ if (tp->t_termios.c_iflag & IXON && tp->t_termios.c_cc[VSTOP] == CTRL('S') && tp->t_termios.c_cc[VSTART] == CTRL('Q')) ttydevsw_pktnotify(tp, TIOCPKT_DOSTOP); else ttydevsw_pktnotify(tp, TIOCPKT_NOSTOP); return (0); } case TIOCGETD: /* For compatibility - we only support TTYDISC. */ *(int *)data = TTYDISC; return (0); case TIOCGPGRP: if (!tty_is_ctty(tp, td->td_proc)) return (ENOTTY); if (tp->t_pgrp != NULL) *(int *)data = tp->t_pgrp->pg_id; else *(int *)data = NO_PID; return (0); case TIOCGSID: if (!tty_is_ctty(tp, td->td_proc)) return (ENOTTY); MPASS(tp->t_session); *(int *)data = tp->t_session->s_sid; return (0); case TIOCSCTTY: { struct proc *p = td->td_proc; /* XXX: This looks awful. */ tty_unlock(tp); sx_xlock(&proctree_lock); tty_lock(tp); if (!SESS_LEADER(p)) { /* Only the session leader may do this. */ sx_xunlock(&proctree_lock); return (EPERM); } if (tp->t_session != NULL && tp->t_session == p->p_session) { /* This is already our controlling TTY. */ sx_xunlock(&proctree_lock); return (0); } if (p->p_session->s_ttyp != NULL || (tp->t_session != NULL && tp->t_session->s_ttyvp != NULL && tp->t_session->s_ttyvp->v_type != VBAD)) { /* * There is already a relation between a TTY and * a session, or the caller is not the session * leader. * * Allow the TTY to be stolen when the vnode is * invalid, but the reference to the TTY is * still active. This allows immediate reuse of * TTYs of which the session leader has been * killed or the TTY revoked. */ sx_xunlock(&proctree_lock); return (EPERM); } /* Connect the session to the TTY. */ tp->t_session = p->p_session; tp->t_session->s_ttyp = tp; tp->t_sessioncnt++; sx_xunlock(&proctree_lock); /* Assign foreground process group. */ tp->t_pgrp = p->p_pgrp; PROC_LOCK(p); p->p_flag |= P_CONTROLT; PROC_UNLOCK(p); return (0); } case TIOCSPGRP: { struct pgrp *pg; /* * XXX: Temporarily unlock the TTY to locate the process * group. This code would be lot nicer if we would ever * decompose proctree_lock. */ tty_unlock(tp); sx_slock(&proctree_lock); pg = pgfind(*(int *)data); if (pg != NULL) PGRP_UNLOCK(pg); if (pg == NULL || pg->pg_session != td->td_proc->p_session) { sx_sunlock(&proctree_lock); tty_lock(tp); return (EPERM); } tty_lock(tp); /* * Determine if this TTY is the controlling TTY after * relocking the TTY. */ if (!tty_is_ctty(tp, td->td_proc)) { sx_sunlock(&proctree_lock); return (ENOTTY); } tp->t_pgrp = pg; sx_sunlock(&proctree_lock); /* Wake up the background process groups. */ cv_broadcast(&tp->t_bgwait); return (0); } case TIOCFLUSH: { int flags = *(int *)data; if (flags == 0) flags = (FREAD|FWRITE); else flags &= (FREAD|FWRITE); tty_flush(tp, flags); return (0); } case TIOCDRAIN: /* Drain TTY output. */ return tty_drain(tp, 0); case TIOCGDRAINWAIT: *(int *)data = tp->t_drainwait; return (0); case TIOCSDRAINWAIT: error = priv_check(td, PRIV_TTY_DRAINWAIT); if (error == 0) tp->t_drainwait = *(int *)data; return (error); case TIOCCONS: /* Set terminal as console TTY. */ if (*(int *)data) { error = priv_check(td, PRIV_TTY_CONSOLE); if (error) return (error); /* * XXX: constty should really need to be locked! * XXX: allow disconnected constty's to be stolen! */ if (constty == tp) return (0); if (constty != NULL) return (EBUSY); tty_unlock(tp); constty_set(tp); tty_lock(tp); } else if (constty == tp) { constty_clear(); } return (0); case TIOCGWINSZ: /* Obtain window size. */ *(struct winsize*)data = tp->t_winsize; return (0); case TIOCSWINSZ: /* Set window size. */ tty_set_winsize(tp, data); return (0); case TIOCEXCL: tp->t_flags |= TF_EXCLUDE; return (0); case TIOCNXCL: tp->t_flags &= ~TF_EXCLUDE; return (0); case TIOCSTOP: tp->t_flags |= TF_STOPPED; ttydevsw_pktnotify(tp, TIOCPKT_STOP); return (0); case TIOCSTART: tp->t_flags &= ~TF_STOPPED; ttydevsw_outwakeup(tp); ttydevsw_pktnotify(tp, TIOCPKT_START); return (0); case TIOCSTAT: tty_info(tp); return (0); case TIOCSTI: if ((fflag & FREAD) == 0 && priv_check(td, PRIV_TTY_STI)) return (EPERM); if (!tty_is_ctty(tp, td->td_proc) && priv_check(td, PRIV_TTY_STI)) return (EACCES); ttydisc_rint(tp, *(char *)data, 0); ttydisc_rint_done(tp); return (0); } #ifdef COMPAT_43TTY return tty_ioctl_compat(tp, cmd, data, fflag, td); #else /* !COMPAT_43TTY */ return (ENOIOCTL); #endif /* COMPAT_43TTY */ } int tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td) { int error; tty_lock_assert(tp, MA_OWNED); if (tty_gone(tp)) return (ENXIO); error = ttydevsw_ioctl(tp, cmd, data, td); if (error == ENOIOCTL) error = tty_generic_ioctl(tp, cmd, data, fflag, td); return (error); } dev_t tty_udev(struct tty *tp) { if (tp->t_dev) return (dev2udev(tp->t_dev)); else return (NODEV); } int tty_checkoutq(struct tty *tp) { /* 256 bytes should be enough to print a log message. */ return (ttyoutq_bytesleft(&tp->t_outq) >= 256); } void tty_hiwat_in_block(struct tty *tp) { if ((tp->t_flags & TF_HIWAT_IN) == 0 && tp->t_termios.c_iflag & IXOFF && tp->t_termios.c_cc[VSTOP] != _POSIX_VDISABLE) { /* * Input flow control. Only enter the high watermark when we * can successfully store the VSTOP character. */ if (ttyoutq_write_nofrag(&tp->t_outq, &tp->t_termios.c_cc[VSTOP], 1) == 0) tp->t_flags |= TF_HIWAT_IN; } else { /* No input flow control. */ tp->t_flags |= TF_HIWAT_IN; } } void tty_hiwat_in_unblock(struct tty *tp) { if (tp->t_flags & TF_HIWAT_IN && tp->t_termios.c_iflag & IXOFF && tp->t_termios.c_cc[VSTART] != _POSIX_VDISABLE) { /* * Input flow control. Only leave the high watermark when we * can successfully store the VSTART character. */ if (ttyoutq_write_nofrag(&tp->t_outq, &tp->t_termios.c_cc[VSTART], 1) == 0) tp->t_flags &= ~TF_HIWAT_IN; } else { /* No input flow control. */ tp->t_flags &= ~TF_HIWAT_IN; } if (!tty_gone(tp)) ttydevsw_inwakeup(tp); } /* * TTY hooks interface. */ static int ttyhook_defrint(struct tty *tp, char c, int flags) { if (ttyhook_rint_bypass(tp, &c, 1) != 1) return (-1); return (0); } int ttyhook_register(struct tty **rtp, struct proc *p, int fd, struct ttyhook *th, void *softc) { struct tty *tp; struct file *fp; struct cdev *dev; struct cdevsw *cdp; struct filedesc *fdp; cap_rights_t rights; int error, ref; /* Validate the file descriptor. */ fdp = p->p_fd; error = fget_unlocked(fdp, fd, cap_rights_init(&rights, CAP_TTYHOOK), &fp, NULL); if (error != 0) return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we * only shall prevent calling devvn_refthread on the file that * never has been opened over a character device. */ if (fp->f_type != DTYPE_VNODE || fp->f_vnode->v_type != VCHR) { error = EINVAL; goto done1; } /* Make sure it is a TTY. */ cdp = devvn_refthread(fp->f_vnode, &dev, &ref); if (cdp == NULL) { error = ENXIO; goto done1; } if (dev != fp->f_data) { error = ENXIO; goto done2; } if (cdp != &ttydev_cdevsw) { error = ENOTTY; goto done2; } tp = dev->si_drv1; /* Try to attach the hook to the TTY. */ error = EBUSY; tty_lock(tp); MPASS((tp->t_hook == NULL) == ((tp->t_flags & TF_HOOK) == 0)); if (tp->t_flags & TF_HOOK) goto done3; tp->t_flags |= TF_HOOK; tp->t_hook = th; tp->t_hooksoftc = softc; *rtp = tp; error = 0; /* Maybe we can switch into bypass mode now. */ ttydisc_optimize(tp); /* Silently convert rint() calls to rint_bypass() when possible. */ if (!ttyhook_hashook(tp, rint) && ttyhook_hashook(tp, rint_bypass)) th->th_rint = ttyhook_defrint; done3: tty_unlock(tp); done2: dev_relthread(dev, ref); done1: fdrop(fp, curthread); return (error); } void ttyhook_unregister(struct tty *tp) { tty_lock_assert(tp, MA_OWNED); MPASS(tp->t_flags & TF_HOOK); /* Disconnect the hook. */ tp->t_flags &= ~TF_HOOK; tp->t_hook = NULL; /* Maybe we need to leave bypass mode. */ ttydisc_optimize(tp); /* Maybe deallocate the TTY as well. */ tty_rel_free(tp); } /* * /dev/console handling. */ static int ttyconsdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct tty *tp; /* System has no console device. */ if (dev_console_filename == NULL) return (ENXIO); /* Look up corresponding TTY by device name. */ sx_slock(&tty_list_sx); TAILQ_FOREACH(tp, &tty_list, t_list) { if (strcmp(dev_console_filename, tty_devname(tp)) == 0) { dev_console->si_drv1 = tp; break; } } sx_sunlock(&tty_list_sx); /* System console has no TTY associated. */ if (dev_console->si_drv1 == NULL) return (ENXIO); return (ttydev_open(dev, oflags, devtype, td)); } static int ttyconsdev_write(struct cdev *dev, struct uio *uio, int ioflag) { log_console(uio); return (ttydev_write(dev, uio, ioflag)); } /* * /dev/console is a little different than normal TTY's. When opened, * it determines which TTY to use. When data gets written to it, it * will be logged in the kernel message buffer. */ static struct cdevsw ttyconsdev_cdevsw = { .d_version = D_VERSION, .d_open = ttyconsdev_open, .d_close = ttydev_close, .d_read = ttydev_read, .d_write = ttyconsdev_write, .d_ioctl = ttydev_ioctl, .d_kqfilter = ttydev_kqfilter, .d_poll = ttydev_poll, .d_mmap = ttydev_mmap, .d_name = "ttyconsdev", .d_flags = D_TTY, }; static void ttyconsdev_init(void *unused __unused) { dev_console = make_dev_credf(MAKEDEV_ETERNAL, &ttyconsdev_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "console"); } SYSINIT(tty, SI_SUB_DRIVERS, SI_ORDER_FIRST, ttyconsdev_init, NULL); void ttyconsdev_select(const char *name) { dev_console_filename = name; } /* * Debugging routines. */ #include "opt_ddb.h" #ifdef DDB #include #include static const struct { int flag; char val; } ttystates[] = { #if 0 { TF_NOPREFIX, 'N' }, #endif { TF_INITLOCK, 'I' }, { TF_CALLOUT, 'C' }, /* Keep these together -> 'Oi' and 'Oo'. */ { TF_OPENED, 'O' }, { TF_OPENED_IN, 'i' }, { TF_OPENED_OUT, 'o' }, { TF_OPENED_CONS, 'c' }, { TF_GONE, 'G' }, { TF_OPENCLOSE, 'B' }, { TF_ASYNC, 'Y' }, { TF_LITERAL, 'L' }, /* Keep these together -> 'Hi' and 'Ho'. */ { TF_HIWAT, 'H' }, { TF_HIWAT_IN, 'i' }, { TF_HIWAT_OUT, 'o' }, { TF_STOPPED, 'S' }, { TF_EXCLUDE, 'X' }, { TF_BYPASS, 'l' }, { TF_ZOMBIE, 'Z' }, { TF_HOOK, 's' }, /* Keep these together -> 'bi' and 'bo'. */ { TF_BUSY, 'b' }, { TF_BUSY_IN, 'i' }, { TF_BUSY_OUT, 'o' }, { 0, '\0'}, }; #define TTY_FLAG_BITS \ "\20\1NOPREFIX\2INITLOCK\3CALLOUT\4OPENED_IN" \ "\5OPENED_OUT\6OPENED_CONS\7GONE\10OPENCLOSE" \ "\11ASYNC\12LITERAL\13HIWAT_IN\14HIWAT_OUT" \ "\15STOPPED\16EXCLUDE\17BYPASS\20ZOMBIE" \ "\21HOOK\22BUSY_IN\23BUSY_OUT" #define DB_PRINTSYM(name, addr) \ db_printf("%s " #name ": ", sep); \ db_printsym((db_addr_t) addr, DB_STGY_ANY); \ db_printf("\n"); static void _db_show_devsw(const char *sep, const struct ttydevsw *tsw) { db_printf("%sdevsw: ", sep); db_printsym((db_addr_t)tsw, DB_STGY_ANY); db_printf(" (%p)\n", tsw); DB_PRINTSYM(open, tsw->tsw_open); DB_PRINTSYM(close, tsw->tsw_close); DB_PRINTSYM(outwakeup, tsw->tsw_outwakeup); DB_PRINTSYM(inwakeup, tsw->tsw_inwakeup); DB_PRINTSYM(ioctl, tsw->tsw_ioctl); DB_PRINTSYM(param, tsw->tsw_param); DB_PRINTSYM(modem, tsw->tsw_modem); DB_PRINTSYM(mmap, tsw->tsw_mmap); DB_PRINTSYM(pktnotify, tsw->tsw_pktnotify); DB_PRINTSYM(free, tsw->tsw_free); } static void _db_show_hooks(const char *sep, const struct ttyhook *th) { db_printf("%shook: ", sep); db_printsym((db_addr_t)th, DB_STGY_ANY); db_printf(" (%p)\n", th); if (th == NULL) return; DB_PRINTSYM(rint, th->th_rint); DB_PRINTSYM(rint_bypass, th->th_rint_bypass); DB_PRINTSYM(rint_done, th->th_rint_done); DB_PRINTSYM(rint_poll, th->th_rint_poll); DB_PRINTSYM(getc_inject, th->th_getc_inject); DB_PRINTSYM(getc_capture, th->th_getc_capture); DB_PRINTSYM(getc_poll, th->th_getc_poll); DB_PRINTSYM(close, th->th_close); } static void _db_show_termios(const char *name, const struct termios *t) { db_printf("%s: iflag 0x%x oflag 0x%x cflag 0x%x " "lflag 0x%x ispeed %u ospeed %u\n", name, t->c_iflag, t->c_oflag, t->c_cflag, t->c_lflag, t->c_ispeed, t->c_ospeed); } /* DDB command to show TTY statistics. */ DB_SHOW_COMMAND(tty, db_show_tty) { struct tty *tp; if (!have_addr) { db_printf("usage: show tty \n"); return; } tp = (struct tty *)addr; db_printf("%p: %s\n", tp, tty_devname(tp)); db_printf("\tmtx: %p\n", tp->t_mtx); db_printf("\tflags: 0x%b\n", tp->t_flags, TTY_FLAG_BITS); db_printf("\trevokecnt: %u\n", tp->t_revokecnt); /* Buffering mechanisms. */ db_printf("\tinq: %p begin %u linestart %u reprint %u end %u " "nblocks %u quota %u\n", &tp->t_inq, tp->t_inq.ti_begin, tp->t_inq.ti_linestart, tp->t_inq.ti_reprint, tp->t_inq.ti_end, tp->t_inq.ti_nblocks, tp->t_inq.ti_quota); db_printf("\toutq: %p begin %u end %u nblocks %u quota %u\n", &tp->t_outq, tp->t_outq.to_begin, tp->t_outq.to_end, tp->t_outq.to_nblocks, tp->t_outq.to_quota); db_printf("\tinlow: %zu\n", tp->t_inlow); db_printf("\toutlow: %zu\n", tp->t_outlow); _db_show_termios("\ttermios", &tp->t_termios); db_printf("\twinsize: row %u col %u xpixel %u ypixel %u\n", tp->t_winsize.ws_row, tp->t_winsize.ws_col, tp->t_winsize.ws_xpixel, tp->t_winsize.ws_ypixel); db_printf("\tcolumn: %u\n", tp->t_column); db_printf("\twritepos: %u\n", tp->t_writepos); db_printf("\tcompatflags: 0x%x\n", tp->t_compatflags); /* Init/lock-state devices. */ _db_show_termios("\ttermios_init_in", &tp->t_termios_init_in); _db_show_termios("\ttermios_init_out", &tp->t_termios_init_out); _db_show_termios("\ttermios_lock_in", &tp->t_termios_lock_in); _db_show_termios("\ttermios_lock_out", &tp->t_termios_lock_out); /* Hooks */ _db_show_devsw("\t", tp->t_devsw); _db_show_hooks("\t", tp->t_hook); /* Process info. */ db_printf("\tpgrp: %p gid %d jobc %d\n", tp->t_pgrp, tp->t_pgrp ? tp->t_pgrp->pg_id : 0, tp->t_pgrp ? tp->t_pgrp->pg_jobc : 0); db_printf("\tsession: %p", tp->t_session); if (tp->t_session != NULL) db_printf(" count %u leader %p tty %p sid %d login %s", tp->t_session->s_count, tp->t_session->s_leader, tp->t_session->s_ttyp, tp->t_session->s_sid, tp->t_session->s_login); db_printf("\n"); db_printf("\tsessioncnt: %u\n", tp->t_sessioncnt); db_printf("\tdevswsoftc: %p\n", tp->t_devswsoftc); db_printf("\thooksoftc: %p\n", tp->t_hooksoftc); db_printf("\tdev: %p\n", tp->t_dev); } /* DDB command to list TTYs. */ DB_SHOW_ALL_COMMAND(ttys, db_show_all_ttys) { struct tty *tp; size_t isiz, osiz; int i, j; /* Make the output look like `pstat -t'. */ db_printf("PTR "); #if defined(__LP64__) db_printf(" "); #endif db_printf(" LINE INQ CAN LIN LOW OUTQ USE LOW " "COL SESS PGID STATE\n"); TAILQ_FOREACH(tp, &tty_list, t_list) { isiz = tp->t_inq.ti_nblocks * TTYINQ_DATASIZE; osiz = tp->t_outq.to_nblocks * TTYOUTQ_DATASIZE; db_printf("%p %10s %5zu %4u %4u %4zu %5zu %4u %4zu %5u %5d " "%5d ", tp, tty_devname(tp), isiz, tp->t_inq.ti_linestart - tp->t_inq.ti_begin, tp->t_inq.ti_end - tp->t_inq.ti_linestart, isiz - tp->t_inlow, osiz, tp->t_outq.to_end - tp->t_outq.to_begin, osiz - tp->t_outlow, MIN(tp->t_column, 99999), tp->t_session ? tp->t_session->s_sid : 0, tp->t_pgrp ? tp->t_pgrp->pg_id : 0); /* Flag bits. */ for (i = j = 0; ttystates[i].flag; i++) if (tp->t_flags & ttystates[i].flag) { db_printf("%c", ttystates[i].val); j++; } if (j == 0) db_printf("-"); db_printf("\n"); } } #endif /* DDB */ Index: head/sys/kern/tty_pts.c =================================================================== --- head/sys/kern/tty_pts.c (revision 318735) +++ head/sys/kern/tty_pts.c (revision 318736) @@ -1,867 +1,869 @@ /*- * Copyright (c) 2008 Ed Schouten * All rights reserved. * * Portions of this software were developed under sponsorship from Snow * B.V., the Netherlands. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* Add compatibility bits for FreeBSD. */ #define PTS_COMPAT /* Add pty(4) compat bits. */ #define PTS_EXTERNAL /* Add bits to make Linux binaries work. */ #define PTS_LINUX #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Our utmp(5) format is limited to 8-byte TTY line names. This means * we can at most allocate 1000 pseudo-terminals ("pts/999"). Allow * users to increase this number, assuming they have manually increased * UT_LINESIZE. */ static struct unrhdr *pts_pool; static MALLOC_DEFINE(M_PTS, "pts", "pseudo tty device"); /* * Per-PTS structure. * * List of locks * (t) locked by tty_lock() * (c) const until freeing */ struct pts_softc { int pts_unit; /* (c) Device unit number. */ unsigned int pts_flags; /* (t) Device flags. */ #define PTS_PKT 0x1 /* Packet mode. */ #define PTS_FINISHED 0x2 /* Return errors on read()/write(). */ char pts_pkt; /* (t) Unread packet mode data. */ struct cv pts_inwait; /* (t) Blocking write() on master. */ struct selinfo pts_inpoll; /* (t) Select queue for write(). */ struct cv pts_outwait; /* (t) Blocking read() on master. */ struct selinfo pts_outpoll; /* (t) Select queue for read(). */ #ifdef PTS_EXTERNAL struct cdev *pts_cdev; /* (c) Master device node. */ #endif /* PTS_EXTERNAL */ struct ucred *pts_cred; /* (c) Resource limit. */ }; /* * Controller-side file operations. */ static int ptsdev_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); int error = 0; char pkt; if (uio->uio_resid == 0) return (0); tty_lock(tp); for (;;) { /* * Implement packet mode. When packet mode is turned on, * the first byte contains a bitmask of events that * occurred (start, stop, flush, window size, etc). */ if (psc->pts_flags & PTS_PKT && psc->pts_pkt) { pkt = psc->pts_pkt; psc->pts_pkt = 0; tty_unlock(tp); error = ureadc(pkt, uio); return (error); } /* * Transmit regular data. * * XXX: We shouldn't use ttydisc_getc_poll()! Even * though in this implementation, there is likely going * to be data, we should just call ttydisc_getc_uio() * and use its return value to sleep. */ if (ttydisc_getc_poll(tp)) { if (psc->pts_flags & PTS_PKT) { /* * XXX: Small race. Fortunately PTY * consumers aren't multithreaded. */ tty_unlock(tp); error = ureadc(TIOCPKT_DATA, uio); if (error) return (error); tty_lock(tp); } error = ttydisc_getc_uio(tp, uio); break; } /* Maybe the device isn't used anyway. */ if (psc->pts_flags & PTS_FINISHED) break; /* Wait for more data. */ if (fp->f_flag & O_NONBLOCK) { error = EWOULDBLOCK; break; } error = cv_wait_sig(&psc->pts_outwait, tp->t_mtx); if (error != 0) break; } tty_unlock(tp); return (error); } static int ptsdev_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); char ib[256], *ibstart; size_t iblen, rintlen; int error = 0; if (uio->uio_resid == 0) return (0); for (;;) { ibstart = ib; iblen = MIN(uio->uio_resid, sizeof ib); error = uiomove(ib, iblen, uio); tty_lock(tp); if (error != 0) { iblen = 0; goto done; } /* * When possible, avoid the slow path. rint_bypass() * copies all input to the input queue at once. */ MPASS(iblen > 0); do { rintlen = ttydisc_rint_simple(tp, ibstart, iblen); ibstart += rintlen; iblen -= rintlen; if (iblen == 0) { /* All data written. */ break; } /* Maybe the device isn't used anyway. */ if (psc->pts_flags & PTS_FINISHED) { error = EIO; goto done; } /* Wait for more data. */ if (fp->f_flag & O_NONBLOCK) { error = EWOULDBLOCK; goto done; } /* Wake up users on the slave side. */ ttydisc_rint_done(tp); error = cv_wait_sig(&psc->pts_inwait, tp->t_mtx); if (error != 0) goto done; } while (iblen > 0); if (uio->uio_resid == 0) break; tty_unlock(tp); } done: ttydisc_rint_done(tp); tty_unlock(tp); /* * Don't account for the part of the buffer that we couldn't * pass to the TTY. */ uio->uio_resid += iblen; return (error); } static int ptsdev_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); int error = 0, sig; switch (cmd) { case FIODTYPE: *(int *)data = D_TTY; return (0); case FIONBIO: /* This device supports non-blocking operation. */ return (0); case FIONREAD: tty_lock(tp); if (psc->pts_flags & PTS_FINISHED) { /* Force read() to be called. */ *(int *)data = 1; } else { *(int *)data = ttydisc_getc_poll(tp); } tty_unlock(tp); return (0); case FIODGNAME: { struct fiodgname_arg *fgn; const char *p; int i; /* Reverse device name lookups, for ptsname() and ttyname(). */ fgn = data; p = tty_devname(tp); i = strlen(p) + 1; if (i > fgn->len) return (EINVAL); return copyout(p, fgn->buf, i); } /* * We need to implement TIOCGPGRP and TIOCGSID here again. When * called on the pseudo-terminal master, it should not check if * the terminal is the foreground terminal of the calling * process. * * TIOCGETA is also implemented here. Various Linux PTY routines * often call isatty(), which is implemented by tcgetattr(). */ #ifdef PTS_LINUX case TIOCGETA: /* Obtain terminal flags through tcgetattr(). */ tty_lock(tp); *(struct termios*)data = tp->t_termios; tty_unlock(tp); return (0); #endif /* PTS_LINUX */ case TIOCSETAF: case TIOCSETAW: /* * We must make sure we turn tcsetattr() calls of TCSAFLUSH and * TCSADRAIN into something different. If an application would * call TCSAFLUSH or TCSADRAIN on the master descriptor, it may * deadlock waiting for all data to be read. */ cmd = TIOCSETA; break; #if defined(PTS_COMPAT) || defined(PTS_LINUX) case TIOCGPTN: /* * Get the device unit number. */ if (psc->pts_unit < 0) return (ENOTTY); *(unsigned int *)data = psc->pts_unit; return (0); #endif /* PTS_COMPAT || PTS_LINUX */ case TIOCGPGRP: /* Get the foreground process group ID. */ tty_lock(tp); if (tp->t_pgrp != NULL) *(int *)data = tp->t_pgrp->pg_id; else *(int *)data = NO_PID; tty_unlock(tp); return (0); case TIOCGSID: /* Get the session leader process ID. */ tty_lock(tp); if (tp->t_session == NULL) error = ENOTTY; else *(int *)data = tp->t_session->s_sid; tty_unlock(tp); return (error); case TIOCPTMASTER: /* Yes, we are a pseudo-terminal master. */ return (0); case TIOCSIG: /* Signal the foreground process group. */ sig = *(int *)data; if (sig < 1 || sig >= NSIG) return (EINVAL); tty_lock(tp); tty_signal_pgrp(tp, sig); tty_unlock(tp); return (0); case TIOCPKT: /* Enable/disable packet mode. */ tty_lock(tp); if (*(int *)data) psc->pts_flags |= PTS_PKT; else psc->pts_flags &= ~PTS_PKT; tty_unlock(tp); return (0); } /* Just redirect this ioctl to the slave device. */ tty_lock(tp); error = tty_ioctl(tp, cmd, data, fp->f_flag, td); tty_unlock(tp); if (error == ENOIOCTL) error = ENOTTY; return (error); } static int ptsdev_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); int revents = 0; tty_lock(tp); if (psc->pts_flags & PTS_FINISHED) { /* Slave device is not opened. */ tty_unlock(tp); return ((events & (POLLIN|POLLRDNORM)) | POLLHUP); } if (events & (POLLIN|POLLRDNORM)) { /* See if we can getc something. */ if (ttydisc_getc_poll(tp) || (psc->pts_flags & PTS_PKT && psc->pts_pkt)) revents |= events & (POLLIN|POLLRDNORM); } if (events & (POLLOUT|POLLWRNORM)) { /* See if we can rint something. */ if (ttydisc_rint_poll(tp)) revents |= events & (POLLOUT|POLLWRNORM); } /* * No need to check for POLLHUP here. This device cannot be used * as a callout device, which means we always have a carrier, * because the master is. */ if (revents == 0) { /* * This code might look misleading, but the naming of * poll events on this side is the opposite of the slave * device. */ if (events & (POLLIN|POLLRDNORM)) selrecord(td, &psc->pts_outpoll); if (events & (POLLOUT|POLLWRNORM)) selrecord(td, &psc->pts_inpoll); } tty_unlock(tp); return (revents); } /* * kqueue support. */ static void pts_kqops_read_detach(struct knote *kn) { struct file *fp = kn->kn_fp; struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); knlist_remove(&psc->pts_outpoll.si_note, kn, 0); } static int pts_kqops_read_event(struct knote *kn, long hint) { struct file *fp = kn->kn_fp; struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); if (psc->pts_flags & PTS_FINISHED) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_getc_poll(tp); return (kn->kn_data > 0); } } static void pts_kqops_write_detach(struct knote *kn) { struct file *fp = kn->kn_fp; struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); knlist_remove(&psc->pts_inpoll.si_note, kn, 0); } static int pts_kqops_write_event(struct knote *kn, long hint) { struct file *fp = kn->kn_fp; struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); if (psc->pts_flags & PTS_FINISHED) { kn->kn_flags |= EV_EOF; return (1); } else { kn->kn_data = ttydisc_rint_poll(tp); return (kn->kn_data > 0); } } static struct filterops pts_kqops_read = { .f_isfd = 1, .f_detach = pts_kqops_read_detach, .f_event = pts_kqops_read_event, }; static struct filterops pts_kqops_write = { .f_isfd = 1, .f_detach = pts_kqops_write_detach, .f_event = pts_kqops_write_event, }; static int ptsdev_kqfilter(struct file *fp, struct knote *kn) { struct tty *tp = fp->f_data; struct pts_softc *psc = tty_softc(tp); int error = 0; tty_lock(tp); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &pts_kqops_read; knlist_add(&psc->pts_outpoll.si_note, kn, 1); break; case EVFILT_WRITE: kn->kn_fop = &pts_kqops_write; knlist_add(&psc->pts_inpoll.si_note, kn, 1); break; default: error = EINVAL; break; } tty_unlock(tp); return (error); } static int ptsdev_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { struct tty *tp = fp->f_data; #ifdef PTS_EXTERNAL struct pts_softc *psc = tty_softc(tp); #endif /* PTS_EXTERNAL */ struct cdev *dev = tp->t_dev; /* * According to POSIX, we must implement an fstat(). This also * makes this implementation compatible with Linux binaries, * because Linux calls fstat() on the pseudo-terminal master to * obtain st_rdev. * * XXX: POSIX also mentions we must fill in st_dev, but how? */ bzero(sb, sizeof *sb); #ifdef PTS_EXTERNAL if (psc->pts_cdev != NULL) sb->st_ino = sb->st_rdev = dev2udev(psc->pts_cdev); else #endif /* PTS_EXTERNAL */ sb->st_ino = sb->st_rdev = tty_udev(tp); sb->st_atim = dev->si_atime; sb->st_ctim = dev->si_ctime; sb->st_mtim = dev->si_mtime; sb->st_uid = dev->si_uid; sb->st_gid = dev->si_gid; sb->st_mode = dev->si_mode | S_IFCHR; return (0); } static int ptsdev_close(struct file *fp, struct thread *td) { struct tty *tp = fp->f_data; /* Deallocate TTY device. */ tty_lock(tp); tty_rel_gone(tp); /* * Open of /dev/ptmx or /dev/ptyXX changes the type of file * from DTYPE_VNODE to DTYPE_PTS. vn_open() increases vnode * use count, we need to decrement it, and possibly do other * required cleanup. */ if (fp->f_vnode != NULL) return (vnops.fo_close(fp, td)); return (0); } static int ptsdev_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct tty *tp; kif->kf_type = KF_TYPE_PTS; tp = fp->f_data; kif->kf_un.kf_pts.kf_pts_dev = tty_udev(tp); + kif->kf_un.kf_pts.kf_pts_dev_freebsd11 = + kif->kf_un.kf_pts.kf_pts_dev; /* truncate */ strlcpy(kif->kf_path, tty_devname(tp), sizeof(kif->kf_path)); return (0); } static struct fileops ptsdev_ops = { .fo_read = ptsdev_read, .fo_write = ptsdev_write, .fo_truncate = invfo_truncate, .fo_ioctl = ptsdev_ioctl, .fo_poll = ptsdev_poll, .fo_kqfilter = ptsdev_kqfilter, .fo_stat = ptsdev_stat, .fo_close = ptsdev_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_fill_kinfo = ptsdev_fill_kinfo, .fo_flags = DFLAG_PASSABLE, }; /* * Driver-side hooks. */ static void ptsdrv_outwakeup(struct tty *tp) { struct pts_softc *psc = tty_softc(tp); cv_broadcast(&psc->pts_outwait); selwakeup(&psc->pts_outpoll); KNOTE_LOCKED(&psc->pts_outpoll.si_note, 0); } static void ptsdrv_inwakeup(struct tty *tp) { struct pts_softc *psc = tty_softc(tp); cv_broadcast(&psc->pts_inwait); selwakeup(&psc->pts_inpoll); KNOTE_LOCKED(&psc->pts_inpoll.si_note, 0); } static int ptsdrv_open(struct tty *tp) { struct pts_softc *psc = tty_softc(tp); psc->pts_flags &= ~PTS_FINISHED; return (0); } static void ptsdrv_close(struct tty *tp) { struct pts_softc *psc = tty_softc(tp); /* Wake up any blocked readers/writers. */ psc->pts_flags |= PTS_FINISHED; ptsdrv_outwakeup(tp); ptsdrv_inwakeup(tp); } static void ptsdrv_pktnotify(struct tty *tp, char event) { struct pts_softc *psc = tty_softc(tp); /* * Clear conflicting flags. */ switch (event) { case TIOCPKT_STOP: psc->pts_pkt &= ~TIOCPKT_START; break; case TIOCPKT_START: psc->pts_pkt &= ~TIOCPKT_STOP; break; case TIOCPKT_NOSTOP: psc->pts_pkt &= ~TIOCPKT_DOSTOP; break; case TIOCPKT_DOSTOP: psc->pts_pkt &= ~TIOCPKT_NOSTOP; break; } psc->pts_pkt |= event; ptsdrv_outwakeup(tp); } static void ptsdrv_free(void *softc) { struct pts_softc *psc = softc; /* Make device number available again. */ if (psc->pts_unit >= 0) free_unr(pts_pool, psc->pts_unit); chgptscnt(psc->pts_cred->cr_ruidinfo, -1, 0); racct_sub_cred(psc->pts_cred, RACCT_NPTS, 1); crfree(psc->pts_cred); seldrain(&psc->pts_inpoll); seldrain(&psc->pts_outpoll); knlist_destroy(&psc->pts_inpoll.si_note); knlist_destroy(&psc->pts_outpoll.si_note); #ifdef PTS_EXTERNAL /* Destroy master device as well. */ if (psc->pts_cdev != NULL) destroy_dev_sched(psc->pts_cdev); #endif /* PTS_EXTERNAL */ free(psc, M_PTS); } static struct ttydevsw pts_class = { .tsw_flags = TF_NOPREFIX, .tsw_outwakeup = ptsdrv_outwakeup, .tsw_inwakeup = ptsdrv_inwakeup, .tsw_open = ptsdrv_open, .tsw_close = ptsdrv_close, .tsw_pktnotify = ptsdrv_pktnotify, .tsw_free = ptsdrv_free, }; #ifndef PTS_EXTERNAL static #endif /* !PTS_EXTERNAL */ int pts_alloc(int fflags, struct thread *td, struct file *fp) { int unit, ok, error; struct tty *tp; struct pts_softc *psc; struct proc *p = td->td_proc; struct ucred *cred = td->td_ucred; /* Resource limiting. */ PROC_LOCK(p); error = racct_add(p, RACCT_NPTS, 1); if (error != 0) { PROC_UNLOCK(p); return (EAGAIN); } ok = chgptscnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPTS)); if (!ok) { racct_sub(p, RACCT_NPTS, 1); PROC_UNLOCK(p); return (EAGAIN); } PROC_UNLOCK(p); /* Try to allocate a new pts unit number. */ unit = alloc_unr(pts_pool); if (unit < 0) { racct_sub(p, RACCT_NPTS, 1); chgptscnt(cred->cr_ruidinfo, -1, 0); return (EAGAIN); } /* Allocate TTY and softc. */ psc = malloc(sizeof(struct pts_softc), M_PTS, M_WAITOK|M_ZERO); cv_init(&psc->pts_inwait, "ptsin"); cv_init(&psc->pts_outwait, "ptsout"); psc->pts_unit = unit; psc->pts_cred = crhold(cred); tp = tty_alloc(&pts_class, psc); knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx); knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx); /* Expose the slave device as well. */ tty_makedev(tp, td->td_ucred, "pts/%u", psc->pts_unit); finit(fp, fflags, DTYPE_PTS, tp, &ptsdev_ops); return (0); } #ifdef PTS_EXTERNAL int pts_alloc_external(int fflags, struct thread *td, struct file *fp, struct cdev *dev, const char *name) { int ok, error; struct tty *tp; struct pts_softc *psc; struct proc *p = td->td_proc; struct ucred *cred = td->td_ucred; /* Resource limiting. */ PROC_LOCK(p); error = racct_add(p, RACCT_NPTS, 1); if (error != 0) { PROC_UNLOCK(p); return (EAGAIN); } ok = chgptscnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPTS)); if (!ok) { racct_sub(p, RACCT_NPTS, 1); PROC_UNLOCK(p); return (EAGAIN); } PROC_UNLOCK(p); /* Allocate TTY and softc. */ psc = malloc(sizeof(struct pts_softc), M_PTS, M_WAITOK|M_ZERO); cv_init(&psc->pts_inwait, "ptsin"); cv_init(&psc->pts_outwait, "ptsout"); psc->pts_unit = -1; psc->pts_cdev = dev; psc->pts_cred = crhold(cred); tp = tty_alloc(&pts_class, psc); knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx); knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx); /* Expose the slave device as well. */ tty_makedev(tp, td->td_ucred, "%s", name); finit(fp, fflags, DTYPE_PTS, tp, &ptsdev_ops); return (0); } #endif /* PTS_EXTERNAL */ int sys_posix_openpt(struct thread *td, struct posix_openpt_args *uap) { int error, fd; struct file *fp; /* * POSIX states it's unspecified when other flags are passed. We * don't allow this. */ if (uap->flags & ~(O_RDWR|O_NOCTTY|O_CLOEXEC)) return (EINVAL); error = falloc(td, &fp, &fd, uap->flags); if (error) return (error); /* Allocate the actual pseudo-TTY. */ error = pts_alloc(FFLAGS(uap->flags & O_ACCMODE), td, fp); if (error != 0) { fdclose(td, fp, fd); fdrop(fp, td); return (error); } /* Pass it back to userspace. */ td->td_retval[0] = fd; fdrop(fp, td); return (0); } static void pts_init(void *unused) { pts_pool = new_unrhdr(0, INT_MAX, NULL); } SYSINIT(pts, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, pts_init, NULL); Index: head/sys/kern/vfs_syscalls.c =================================================================== --- head/sys/kern/vfs_syscalls.c (revision 318735) +++ head/sys/kern/vfs_syscalls.c (revision 318736) @@ -1,4388 +1,4568 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); SDT_PROVIDER_DEFINE(vfs); SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag); static int setfflags(struct thread *td, struct vnode *, u_long); static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); static int getutimens(const struct timespec *, enum uio_seg, struct timespec *, int *); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif /* ARGSUSED */ int sys_sync(struct thread *td, struct sync_args *uap) { struct mount *mp, *nmp; int save; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { save = curthread_pflags_set(TDP_SYNCIO); vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); curthread_pflags_restore(save); vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif int sys_quotactl(struct thread *td, struct quotactl_args *uap) { struct mount *mp; struct nameidata nd; int error; AUDIT_ARG_CMD(uap->cmd); AUDIT_ARG_UID(uap->uid); if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) return (EPERM); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); mp = nd.ni_vp->v_mount; vfs_ref(mp); vput(nd.ni_vp); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (error); error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); /* * Since quota on operation typically needs to open quota * file, the Q_QUOTAON handler needs to unbusy the mount point * before calling into namei. Otherwise, unmount might be * started between two vfs_busy() invocations (first is our, * second is from mount point cross-walk code in lookup()), * causing deadlock. * * Require that Q_QUOTAON handles the vfs_busy() reference on * its own, always returning with ubusied mount point. */ if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) vfs_unbusy(mp); return (error); } /* * Used by statfs conversion routines to scale the block size up if * necessary so that all of the block counts are <= 'max_size'. Note * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero * value of 'n'. */ void statfs_scale_blocks(struct statfs *sf, long max_size) { uint64_t count; int shift; KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); /* * Attempt to scale the block counts to give a more accurate * overview to userland of the ratio of free space to used * space. To do this, find the largest block count and compute * a divisor that lets it fit into a signed integer <= max_size. */ if (sf->f_bavail < 0) count = -sf->f_bavail; else count = sf->f_bavail; count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); if (count <= max_size) return; count >>= flsl(max_size); shift = 0; while (count > 0) { shift++; count >>=1; } sf->f_bsize <<= shift; sf->f_blocks >>= shift; sf->f_bfree >>= shift; sf->f_bavail >>= shift; } static int kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) { struct statfs *sp; int error; if (mp == NULL) return (EBADF); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (error); #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif /* * Set these in case the underlying filesystem fails to do so. */ sp = &mp->mnt_stat; sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp); if (error != 0) goto out; *buf = *sp; if (priv_check(td, PRIV_VFS_GENERATION)) { buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, buf); } out: vfs_unbusy(mp); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif int sys_statfs(struct thread *td, struct statfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, td); error = namei(&nd); if (error != 0) return (error); mp = nd.ni_vp->v_mount; vfs_ref(mp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); return (kern_do_statfs(td, mp, buf)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif int sys_fstatfs(struct thread *td, struct fstatfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; struct vnode *vp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); if (error != 0) return (error); vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); #ifdef AUDIT AUDIT_ARG_VNODE1(vp); #endif mp = vp->v_mount; if (mp != NULL) vfs_ref(mp); VOP_UNLOCK(vp, 0); fdrop(fp, td); return (kern_do_statfs(td, mp, buf)); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int mode; }; #endif int sys_getfsstat(struct thread *td, struct getfsstat_args *uap) { size_t count; int error; if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) return (EINVAL); error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, UIO_USERSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; return (error); } /* * If (bufsize > 0 && bufseg == UIO_SYSSPACE) * The caller is responsible for freeing memory which will be allocated * in '*buf'. */ int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode) { struct mount *mp, *nmp; struct statfs *sfsp, *sp, *sptmp, *tofree; size_t count, maxcount; int error; switch (mode) { case MNT_WAIT: case MNT_NOWAIT: break; default: if (bufseg == UIO_SYSSPACE) *buf = NULL; return (EINVAL); } restart: maxcount = bufsize / sizeof(struct statfs); if (bufsize == 0) { sfsp = NULL; tofree = NULL; } else if (bufseg == UIO_USERSPACE) { sfsp = *buf; tofree = NULL; } else /* if (bufseg == UIO_SYSSPACE) */ { count = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { count++; } mtx_unlock(&mountlist_mtx); if (maxcount > count) maxcount = count; tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_STATFS, M_WAITOK); } count = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_mount_check_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif if (mode == MNT_WAIT) { if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { /* * If vfs_busy() failed, and MBF_NOWAIT * wasn't passed, then the mp is gone. * Furthermore, because of MBF_MNTLSTLOCK, * the mountlist_mtx was dropped. We have * no other choice than to start over. */ mtx_unlock(&mountlist_mtx); free(tofree, M_STATFS); goto restart; } } else { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } } if (sfsp != NULL && count < maxcount) { sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem * fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; /* * If MNT_NOWAIT is specified, do not refresh * the fsstat cache. */ if (mode != MNT_NOWAIT) { error = VFS_STATFS(mp, sp); if (error != 0) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); continue; } } if (priv_check(td, PRIV_VFS_GENERATION)) { sptmp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); *sptmp = *sp; sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, sptmp); sp = sptmp; } else sptmp = NULL; if (bufseg == UIO_SYSSPACE) { bcopy(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); } else /* if (bufseg == UIO_USERSPACE) */ { error = copyout(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); if (error != 0) { vfs_unbusy(mp); return (error); } } sfsp++; } count++; mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); if (sfsp != NULL && count > maxcount) *countp = maxcount; else *countp = count; return (0); } #ifdef COMPAT_FREEBSD4 /* * Get old format filesystem statistics. */ -static void cvtstatfs(struct statfs *, struct ostatfs *); +static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); #ifndef _SYS_SYSPROTO_H_ struct freebsd4_statfs_args { char *path; struct ostatfs *buf; }; #endif int freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) { - cvtstatfs(sfp, &osb); + freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fstatfs_args { int fd; struct ostatfs *buf; }; #endif int freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) { - cvtstatfs(sfp, &osb); + freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_getfsstat_args { struct ostatfs *buf; long bufsize; int mode; }; #endif int freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) { struct statfs *buf, *sp; struct ostatfs osb; size_t count, size; int error; if (uap->bufsize < 0) return (EINVAL); count = uap->bufsize / sizeof(struct ostatfs); if (count > SIZE_MAX / sizeof(struct statfs)) return (EINVAL); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); - td->td_retval[0] = count; + if (error == 0) + td->td_retval[0] = count; if (size != 0) { sp = buf; while (count != 0 && error == 0) { - cvtstatfs(sp, &osb); + freebsd4_cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_STATFS); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fhstatfs_args { struct fhandle *u_fhp; struct ostatfs *buf; }; #endif int freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) { - cvtstatfs(sfp, &osb); + freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void -cvtstatfs(struct statfs *nsp, struct ostatfs *osp) +freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) { statfs_scale_blocks(nsp, LONG_MAX); bzero(osp, sizeof(*osp)); osp->f_bsize = nsp->f_bsize; osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); osp->f_blocks = nsp->f_blocks; osp->f_bfree = nsp->f_bfree; osp->f_bavail = nsp->f_bavail; osp->f_files = MIN(nsp->f_files, LONG_MAX); osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); osp->f_owner = nsp->f_owner; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); strlcpy(osp->f_fstypename, nsp->f_fstypename, MIN(MFSNAMELEN, OMFSNAMELEN)); strlcpy(osp->f_mntonname, nsp->f_mntonname, MIN(MNAMELEN, OMNAMELEN)); strlcpy(osp->f_mntfromname, nsp->f_mntfromname, MIN(MNAMELEN, OMNAMELEN)); osp->f_fsid = nsp->f_fsid; } #endif /* COMPAT_FREEBSD4 */ +#if defined(COMPAT_FREEBSD11) /* + * Get old format filesystem statistics. + */ +static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); + +int +freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) +{ + struct freebsd11_statfs osb; + struct statfs *sfp; + int error; + + sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); + if (error == 0) { + freebsd11_cvtstatfs(sfp, &osb); + error = copyout(&osb, uap->buf, sizeof(osb)); + } + free(sfp, M_STATFS); + return (error); +} + +/* + * Get filesystem statistics. + */ +int +freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) +{ + struct freebsd11_statfs osb; + struct statfs *sfp; + int error; + + sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_fstatfs(td, uap->fd, sfp); + if (error == 0) { + freebsd11_cvtstatfs(sfp, &osb); + error = copyout(&osb, uap->buf, sizeof(osb)); + } + free(sfp, M_STATFS); + return (error); +} + +/* + * Get statistics on all filesystems. + */ +int +freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) +{ + struct freebsd11_statfs osb; + struct statfs *buf, *sp; + size_t count, size; + int error; + + count = uap->bufsize / sizeof(struct ostatfs); + size = count * sizeof(struct statfs); + error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, + uap->mode); + if (error == 0) + td->td_retval[0] = count; + if (size > 0) { + sp = buf; + while (count > 0 && error == 0) { + freebsd11_cvtstatfs(sp, &osb); + error = copyout(&osb, uap->buf, sizeof(osb)); + sp++; + uap->buf++; + count--; + } + free(buf, M_STATFS); + } + return (error); +} + +/* + * Implement fstatfs() for (NFS) file handles. + */ +int +freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) +{ + struct freebsd11_statfs osb; + struct statfs *sfp; + fhandle_t fh; + int error; + + error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); + if (error) + return (error); + sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); + error = kern_fhstatfs(td, fh, sfp); + if (error == 0) { + freebsd11_cvtstatfs(sfp, &osb); + error = copyout(&osb, uap->buf, sizeof(osb)); + } + free(sfp, M_STATFS); + return (error); +} + +/* + * Convert a new format statfs structure to an old format statfs structure. + */ +static void +freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) +{ + + bzero(osp, sizeof(*osp)); + osp->f_version = FREEBSD11_STATFS_VERSION; + osp->f_type = nsp->f_type; + osp->f_flags = nsp->f_flags; + osp->f_bsize = nsp->f_bsize; + osp->f_iosize = nsp->f_iosize; + osp->f_blocks = nsp->f_blocks; + osp->f_bfree = nsp->f_bfree; + osp->f_bavail = nsp->f_bavail; + osp->f_files = nsp->f_files; + osp->f_ffree = nsp->f_ffree; + osp->f_syncwrites = nsp->f_syncwrites; + osp->f_asyncwrites = nsp->f_asyncwrites; + osp->f_syncreads = nsp->f_syncreads; + osp->f_asyncreads = nsp->f_asyncreads; + osp->f_namemax = nsp->f_namemax; + osp->f_owner = nsp->f_owner; + osp->f_fsid = nsp->f_fsid; + strlcpy(osp->f_fstypename, nsp->f_fstypename, + MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); + strlcpy(osp->f_mntonname, nsp->f_mntonname, + MIN(MNAMELEN, sizeof(osp->f_mntonname))); + strlcpy(osp->f_mntfromname, nsp->f_mntfromname, + MIN(MNAMELEN, sizeof(osp->f_mntfromname))); +} +#endif /* COMPAT_FREEBSD11 */ + +/* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif int sys_fchdir(struct thread *td, struct fchdir_args *uap) { struct vnode *vp, *tdp; struct mount *mp; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), &fp); if (error != 0) return (error); vp = fp->f_vnode; vrefact(vp); fdrop(fp, td); vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); error = change_dir(vp, td); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0)) continue; error = VFS_ROOT(mp, LK_SHARED, &tdp); vfs_unbusy(mp); if (error != 0) break; vput(vp); vp = tdp; } if (error != 0) { vput(vp); return (error); } VOP_UNLOCK(vp, 0); pwd_chdir(td, vp); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif int sys_chdir(struct thread *td, struct chdir_args *uap) { return (kern_chdir(td, uap->path, UIO_USERSPACE)); } int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); if ((error = change_dir(nd.ni_vp, td)) != 0) { vput(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } VOP_UNLOCK(nd.ni_vp, 0); NDFREE(&nd, NDF_ONLY_PNBUF); pwd_chdir(td, nd.ni_vp); return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif int sys_chroot(struct thread *td, struct chroot_args *uap) { struct nameidata nd; int error; error = priv_check(td, PRIV_VFS_CHROOT); if (error != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) goto error; error = change_dir(nd.ni_vp, td); if (error != 0) goto e_vunlock; #ifdef MAC error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); if (error != 0) goto e_vunlock; #endif VOP_UNLOCK(nd.ni_vp, 0); error = pwd_chroot(td, nd.ni_vp); vrele(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); e_vunlock: vput(nd.ni_vp); error: NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Common routine for chroot and chdir. Callers must provide a locked vnode * instance. */ int change_dir(struct vnode *vp, struct thread *td) { #ifdef MAC int error; #endif ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); if (vp->v_type != VDIR) return (ENOTDIR); #ifdef MAC error = mac_vnode_check_chdir(td->td_ucred, vp); if (error != 0) return (error); #endif return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); } static __inline void flags_to_rights(int flags, cap_rights_t *rightsp) { if (flags & O_EXEC) { cap_rights_set(rightsp, CAP_FEXECVE); } else { switch ((flags & O_ACCMODE)) { case O_RDONLY: cap_rights_set(rightsp, CAP_READ); break; case O_RDWR: cap_rights_set(rightsp, CAP_READ); /* FALLTHROUGH */ case O_WRONLY: cap_rights_set(rightsp, CAP_WRITE); if (!(flags & (O_APPEND | O_TRUNC))) cap_rights_set(rightsp, CAP_SEEK); break; } } if (flags & O_CREAT) cap_rights_set(rightsp, CAP_CREATE); if (flags & O_TRUNC) cap_rights_set(rightsp, CAP_FTRUNCATE); if (flags & (O_SYNC | O_FSYNC)) cap_rights_set(rightsp, CAP_FSYNC); if (flags & (O_EXLOCK | O_SHLOCK)) cap_rights_set(rightsp, CAP_FLOCK); } /* * Check permissions, allocate an open file structure, and call the device * open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int sys_open(struct thread *td, struct open_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct openat_args { int fd; char *path; int flag; int mode; }; #endif int sys_openat(struct thread *td, struct openat_args *uap) { AUDIT_ARG_FD(uap->fd); return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->mode)); } int kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int cmode, error, indx; indx = -1; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); cap_rights_init(&rights, CAP_LOOKUP); flags_to_rights(flags, &rights); /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags * may be specified. */ if (flags & O_EXEC) { if (flags & O_ACCMODE) return (EINVAL); } else if ((flags & O_ACCMODE) == O_ACCMODE) { return (EINVAL); } else { flags = FFLAGS(flags); } /* * Allocate a file structure. The descriptor to reference it * is allocated and set by finstall() below. */ error = falloc_noinstall(td, &fp); if (error != 0) return (error); /* * An extra reference on `fp' has been held for us by * falloc_noinstall(). */ /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, &rights, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, fp); if (error != 0) { /* * If the vn_open replaced the method vector, something * wonderous happened deep below and we just pass it up * pretending we know what we do. */ if (error == ENXIO && fp->f_ops != &badfileops) goto success; /* * Handle special fdopen() case. bleh. * * Don't do this for relative (capability) lookups; we don't * understand exactly what would happen, and we don't think * that it ever should. */ if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && (error == ENODEV || error == ENXIO) && td->td_dupfd >= 0) { error = dupfdopen(td, fdp, td->td_dupfd, flags, error, &indx); if (error == 0) goto success; } goto bad; } td->td_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * Store the vnode, for any f_type. Typically, the vnode use * count is decremented by direct call to vn_closefile() for * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); fp->f_seqcount = 1; finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, &vnops); } VOP_UNLOCK(vp, 0); if (flags & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } success: /* * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { struct filecaps *fcaps; #ifdef CAPABILITIES if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) fcaps = &nd.ni_filecaps; else #endif fcaps = NULL; error = finstall(td, fp, &indx, flags, fcaps); /* On success finstall() consumes fcaps. */ if (error != 0) { filecaps_free(&nd.ni_filecaps); goto bad; } } else { filecaps_free(&nd.ni_filecaps); } /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: KASSERT(indx == -1, ("indx=%d, should be -1", indx)); fdrop(fp, td); return (error); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(struct thread *td, struct ocreat_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ -struct mknod_args { +struct mknodat_args { + int fd; char *path; - int mode; - int dev; + mode_t mode; + dev_t dev; }; #endif int -sys_mknod(struct thread *td, struct mknod_args *uap) +sys_mknodat(struct thread *td, struct mknodat_args *uap) { + return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, + uap->dev)); +} + +#if defined(COMPAT_FREEBSD11) +int +freebsd11_mknod(struct thread *td, + struct freebsd11_mknod_args *uap) +{ + return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } -#ifndef _SYS_SYSPROTO_H_ -struct mknodat_args { - int fd; - char *path; - mode_t mode; - dev_t dev; -}; -#endif int -sys_mknodat(struct thread *td, struct mknodat_args *uap) +freebsd11_mknodat(struct thread *td, + struct freebsd11_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } +#endif /* COMPAT_FREEBSD11 */ int kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, - int mode, int dev) + int mode, dev_t dev) { struct vnode *vp; struct mount *mp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error, whiteout = 0; AUDIT_ARG_MODE(mode); AUDIT_ARG_DEV(dev); switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: error = priv_check(td, PRIV_VFS_MKNOD_DEV); if (error == 0 && dev == VNOVAL) error = EINVAL; break; case S_IFMT: error = priv_check(td, PRIV_VFS_MKNOD_BAD); break; case S_IFWHT: error = priv_check(td, PRIV_VFS_MKNOD_WHT); break; case S_IFIFO: if (dev == 0) return (kern_mkfifoat(td, fd, path, pathseg, mode)); /* FALLTHROUGH */ default: error = EINVAL; break; } if (error != 0) return (error); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } else { VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; vattr.va_rdev = dev; whiteout = 0; switch (mode & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: panic("kern_mknod: invalid mode"); } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC if (error == 0 && !whiteout) error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) { if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif int sys_mkfifo(struct thread *td, struct mkfifo_args *uap) { return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkfifoat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) { return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode) { struct mount *mp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); #ifdef MAC out: #endif vput(nd.ni_dvp); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif int sys_link(struct thread *td, struct link_args *uap) { return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, UIO_USERSPACE, FOLLOW)); } #ifndef _SYS_SYSPROTO_H_ struct linkat_args { int fd1; char *path1; int fd2; char *path2; int flag; }; #endif int sys_linkat(struct thread *td, struct linkat_args *uap) { int flag; flag = uap->flag; if (flag & ~AT_SYMLINK_FOLLOW) return (EINVAL); return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); } int hardlink_check_uid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, &hardlink_check_uid, 0, "Unprivileged processes cannot create hard links to files owned by other " "users"); static int hardlink_check_gid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, &hardlink_check_gid, 0, "Unprivileged processes cannot create hard links to files owned by other " "groups"); static int can_hardlink(struct vnode *vp, struct ucred *cred) { struct vattr va; int error; if (!hardlink_check_uid && !hardlink_check_gid) return (0); error = VOP_GETATTR(vp, &va, cred); if (error != 0) return (error); if (hardlink_check_uid && cred->cr_uid != va.va_uid) { error = priv_check_cred(cred, PRIV_VFS_LINK, 0); if (error != 0) return (error); } if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { error = priv_check_cred(cred, PRIV_VFS_LINK, 0); if (error != 0) return (error); } return (0); } int kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, enum uio_seg segflg, int follow) { struct vnode *vp; struct mount *mp; struct nameidata nd; cap_rights_t rights; int error; again: bwillwrite(); NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type == VDIR) { vrele(vp); return (EPERM); /* POSIX */ } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT_TARGET), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); vrele(vp); return (EEXIST); } else if (nd.ni_dvp->v_mount != vp->v_mount) { /* * Cross-device link. No need to recheck * vp->v_type, since it cannot change, except * to VBAD. */ NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vrele(vp); return (EXDEV); } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { error = can_hardlink(vp, td->td_ucred); #ifdef MAC if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); #endif if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } error = vn_start_write(vp, &mp, V_NOWAIT); if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); goto again; } error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0); vput(nd.ni_dvp); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); goto again; } } vrele(vp); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif int sys_symlink(struct thread *td, struct symlink_args *uap) { return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct symlinkat_args { char *path; int fd; char *path2; }; #endif int sys_symlinkat(struct thread *td, struct symlinkat_args *uap) { return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, UIO_USERSPACE)); } int kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, enum uio_seg segflg) { struct mount *mp; struct vattr vattr; char *syspath; struct nameidata nd; int error; cap_rights_t rights; if (segflg == UIO_SYSSPACE) { syspath = path1; } else { syspath = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) goto out; } AUDIT_ARG_TEXT(syspath); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto out; goto restart; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; #ifdef MAC vattr.va_type = VLNK; error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out2; #endif error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); if (error == 0) vput(nd.ni_vp); #ifdef MAC out2: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); out: if (segflg != UIO_SYSSPACE) uma_zfree(namei_zone, syspath); return (error); } /* * Delete a whiteout from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct undelete_args { char *path; }; #endif int sys_undelete(struct thread *td, struct undelete_args *uap) { struct mount *mp; struct nameidata nd; int error; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif int sys_unlink(struct thread *td, struct unlink_args *uap) { return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); } #ifndef _SYS_SYSPROTO_H_ struct unlinkat_args { int fd; char *path; int flag; }; #endif int sys_unlinkat(struct thread *td, struct unlinkat_args *uap) { int flag = uap->flag; int fd = uap->fd; char *path = uap->path; if (flag & ~AT_REMOVEDIR) return (EINVAL); if (flag & AT_REMOVEDIR) return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); else return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); } int kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, ino_t oldinum) { struct mount *mp; struct vnode *vp; struct nameidata nd; struct stat sb; cap_rights_t rights; int error; restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; if (vp->v_type == VDIR && oldinum == 0) { error = EPERM; /* POSIX */ } else if (oldinum != 0 && ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && sb.st_ino != oldinum) { error = EIDRM; /* Identifier removed */ } else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_vflag & VV_ROOT) error = EBUSY; } if (error == 0) { if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: #endif vn_finished_write(mp); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int sys_lseek(struct thread *td, struct lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } int kern_lseek(struct thread *td, int fd, off_t offset, int whence) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); if (error != 0) return (error); error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? fo_seek(fp, offset, whence, td) : ESPIPE; fdrop(fp, td); return (error); } #if defined(COMPAT_43) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(struct thread *td, struct olseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Version with the 'pad' argument */ int freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* * Check access permissions using passed credentials. */ static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td) { accmode_t accmode; int error; /* Flags == 0 means only check for existence. */ if (user_flags == 0) return (0); accmode = 0; if (user_flags & R_OK) accmode |= VREAD; if (user_flags & W_OK) accmode |= VWRITE; if (user_flags & X_OK) accmode |= VEXEC; #ifdef MAC error = mac_vnode_check_access(cred, vp, accmode); if (error != 0) return (error); #endif if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, accmode, cred, td); return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int amode; }; #endif int sys_access(struct thread *td, struct access_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, uap->amode)); } #ifndef _SYS_SYSPROTO_H_ struct faccessat_args { int dirfd; char *path; int amode; int flag; } #endif int sys_faccessat(struct thread *td, struct faccessat_args *uap) { return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->amode)); } int kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flag, int amode) { struct ucred *cred, *usecred; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int error; if (flag & ~AT_EACCESS) return (EINVAL); if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) return (EINVAL); /* * Create and modify a temporary credential instead of one that * is potentially shared (if we need one). */ cred = td->td_ucred; if ((flag & AT_EACCESS) == 0 && ((cred->cr_uid != cred->cr_ruid || cred->cr_rgid != cred->cr_groups[0]))) { usecred = crdup(cred); usecred->cr_uid = cred->cr_ruid; usecred->cr_groups[0] = cred->cr_rgid; td->td_ucred = usecred; } else usecred = cred; AUDIT_ARG_VALUE(amode); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), td); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; error = vn_access(vp, amode, usecred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); out: if (usecred != cred) { td->td_ucred = cred; crfree(usecred); } return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int amode; }; #endif int sys_eaccess(struct thread *td, struct eaccess_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, AT_EACCESS, uap->amode)); } #if defined(COMPAT_43) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif int ostat(struct thread *td, struct ostat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif int olstat(struct thread *td, struct olstat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Convert from an old to a new stat structure. */ void cvtstat(struct stat *st, struct ostat *ost) { bzero(ost, sizeof(*ost)); ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atim = st->st_atim; ost->st_mtim = st->st_mtim; ost->st_ctim = st->st_ctim; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 */ -/* - * Get file status; this version follows links. - */ -#ifndef _SYS_SYSPROTO_H_ -struct stat_args { - char *path; - struct stat *ub; -}; -#endif +#if defined(COMPAT_FREEBSD11) +void +freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) +{ + + ost->st_dev = st->st_dev; + ost->st_ino = st->st_ino; /* truncate */ + ost->st_mode = st->st_mode; + ost->st_nlink = st->st_nlink; /* truncate */ + ost->st_uid = st->st_uid; + ost->st_gid = st->st_gid; + ost->st_rdev = st->st_rdev; + ost->st_atim = st->st_atim; + ost->st_mtim = st->st_mtim; + ost->st_ctim = st->st_ctim; + ost->st_size = st->st_size; + ost->st_blocks = st->st_blocks; + ost->st_blksize = st->st_blksize; + ost->st_flags = st->st_flags; + ost->st_gen = st->st_gen; + ost->st_lspare = 0; + ost->st_birthtim = st->st_birthtim; + bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), + sizeof(*ost) - offsetof(struct freebsd11_stat, + st_birthtim) - sizeof(ost->st_birthtim)); +} + int -sys_stat(struct thread *td, struct stat_args *uap) +freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) { struct stat sb; + struct freebsd11_stat osb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); - if (error == 0) - error = copyout(&sb, uap->ub, sizeof (sb)); + if (error != 0) + return (error); + freebsd11_cvtstat(&sb, &osb); + error = copyout(&osb, uap->ub, sizeof(osb)); return (error); } +int +freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) +{ + struct stat sb; + struct freebsd11_stat osb; + int error; + + error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, + UIO_USERSPACE, &sb, NULL); + if (error != 0) + return (error); + freebsd11_cvtstat(&sb, &osb); + error = copyout(&osb, uap->ub, sizeof(osb)); + return (error); +} + +int +freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) +{ + struct fhandle fh; + struct stat sb; + struct freebsd11_stat osb; + int error; + + error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); + if (error != 0) + return (error); + error = kern_fhstat(td, fh, &sb); + if (error != 0) + return (error); + freebsd11_cvtstat(&sb, &osb); + error = copyout(&osb, uap->sb, sizeof(osb)); + return (error); +} + +int +freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) +{ + struct stat sb; + struct freebsd11_stat osb; + int error; + + error = kern_statat(td, uap->flag, uap->fd, uap->path, + UIO_USERSPACE, &sb, NULL); + if (error != 0) + return (error); + freebsd11_cvtstat(&sb, &osb); + error = copyout(&osb, uap->buf, sizeof(osb)); + return (error); +} +#endif /* COMPAT_FREEBSD11 */ + +/* + * Get file status + */ #ifndef _SYS_SYSPROTO_H_ struct fstatat_args { int fd; char *path; struct stat *buf; int flag; } #endif int sys_fstatat(struct thread *td, struct fstatat_args *uap) { struct stat sb; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error == 0) error = copyout(&sb, uap->buf, sizeof (sb)); return (error); } int kern_statat(struct thread *td, int flag, int fd, char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)) { struct nameidata nd; struct stat sb; cap_rights_t rights; int error; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), td); if ((error = namei(&nd)) != 0) return (error); error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); if (error == 0) { SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); if (S_ISREG(sb.st_mode)) SDT_PROBE2(vfs, , stat, reg, path, pathseg); if (__predict_false(hook != NULL)) hook(nd.ni_vp, &sb); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); if (error != 0) return (error); +#ifdef __STAT_TIME_T_EXT + sb.st_atim_ext = 0; + sb.st_mtim_ext = 0; + sb.st_ctim_ext = 0; + sb.st_btim_ext = 0; +#endif *sbp = sb; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrstat(&sb); #endif return (0); } +#if defined(COMPAT_FREEBSD11) /* - * Get file status; this version does not follow links. - */ -#ifndef _SYS_SYSPROTO_H_ -struct lstat_args { - char *path; - struct stat *ub; -}; -#endif -int -sys_lstat(struct thread *td, struct lstat_args *uap) -{ - struct stat sb; - int error; - - error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, - UIO_USERSPACE, &sb, NULL); - if (error == 0) - error = copyout(&sb, uap->ub, sizeof (sb)); - return (error); -} - -/* * Implementation of the NetBSD [l]stat() functions. */ void -cvtnstat( struct stat *sb, struct nstat *nsb) +freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) { - bzero(nsb, sizeof *nsb); + bzero(nsb, sizeof(*nsb)); nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atim = sb->st_atim; nsb->st_mtim = sb->st_mtim; nsb->st_ctim = sb->st_ctim; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_birthtim = sb->st_birthtim; } #ifndef _SYS_SYSPROTO_H_ -struct nstat_args { +struct freebsd11_nstat_args { char *path; struct nstat *ub; }; #endif int -sys_nstat(struct thread *td, struct nstat_args *uap) +freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); - cvtnstat(&sb, &nsb); + freebsd11_cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ -struct lstat_args { +struct freebsd11_nlstat_args { char *path; - struct stat *ub; + struct nstat *ub; }; #endif int -sys_nlstat(struct thread *td, struct nlstat_args *uap) +freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); - cvtnstat(&sb, &nsb); + freebsd11_cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } +#endif /* COMPAT_FREEBSD11 */ /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif int sys_pathconf(struct thread *td, struct pathconf_args *uap) { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); } #ifndef _SYS_SYSPROTO_H_ struct lpathconf_args { char *path; int name; }; #endif int sys_lpathconf(struct thread *td, struct lpathconf_args *uap) { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW)); } int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, u_long flags) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; size_t count; }; #endif int sys_readlink(struct thread *td, struct readlink_args *uap) { return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->count)); } #ifndef _SYS_SYSPROTO_H_ struct readlinkat_args { int fd; char *path; char *buf; size_t bufsize; }; #endif int sys_readlinkat(struct thread *td, struct readlinkat_args *uap) { return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->bufsize)); } int kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) { struct vnode *vp; struct iovec aiov; struct uio auio; struct nameidata nd; int error; if (count > IOSIZE_MAX) return (EINVAL); NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; #ifdef MAC error = mac_vnode_check_readlink(td->td_ucred, vp); if (error != 0) { vput(vp); return (error); } #endif if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; auio.uio_resid = count; error = VOP_READLINK(vp, &auio, td->td_ucred); td->td_retval[0] = count - auio.uio_resid; } vput(vp); return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int setfflags(struct thread *td, struct vnode *vp, u_long flags) { struct mount *mp; struct vattr vattr; int error; /* We can't support the value matching VNOVAL. */ if (flags == VNOVAL) return (EOPNOTSUPP); /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); if (error != 0) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VATTR_NULL(&vattr); vattr.va_flags = flags; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { const char *path; u_long flags; }; #endif int sys_chflags(struct thread *td, struct chflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, 0)); } #ifndef _SYS_SYSPROTO_H_ struct chflagsat_args { int fd; const char *path; u_long flags; int atflag; } #endif int sys_chflagsat(struct thread *td, struct chflagsat_args *uap) { int fd = uap->fd; const char *path = uap->path; u_long flags = uap->flags; int atflag = uap->atflag; if (atflag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); } /* * Same as chflags() but doesn't follow symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchflags_args { const char *path; u_long flags; }; #endif int sys_lchflags(struct thread *td, struct lchflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, AT_SYMLINK_NOFOLLOW)); } static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_FFLAGS(flags); follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHFLAGS), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfflags(td, nd.ni_vp, flags); vrele(nd.ni_vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; u_long flags; }; #endif int sys_fchflags(struct thread *td, struct fchflags_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_FFLAGS(uap->flags); error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setfflags(td, fp->f_vnode, uap->flags); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; #ifdef MAC error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif int sys_chmod(struct thread *td, struct chmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchmodat_args { int dirfd; char *path; mode_t mode; int flag; } #endif int sys_fchmodat(struct thread *td, struct fchmodat_args *uap) { int flag = uap->flag; int fd = uap->fd; char *path = uap->path; mode_t mode = uap->mode; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif int sys_lchmod(struct thread *td, struct lchmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, AT_SYMLINK_NOFOLLOW)); } int kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, mode_t mode, int flag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_MODE(mode); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHMOD), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, td->td_ucred, nd.ni_vp, mode); vrele(nd.ni_vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif int sys_fchmod(struct thread *td, struct fchmod_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_MODE(uap->mode); error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); if (error != 0) return (error); error = fo_chmod(fp, uap->mode, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; #ifdef MAC error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, vattr.va_gid); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif int sys_chown(struct thread *td, struct chown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchownat_args { int fd; const char * path; uid_t uid; gid_t gid; int flag; }; #endif int sys_fchownat(struct thread *td, struct fchownat_args *uap) { int flag; flag = uap->flag; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, uap->gid, uap->flag)); } int kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int uid, int gid, int flag) { struct nameidata nd; cap_rights_t rights; int error, follow; AUDIT_ARG_OWNER(uid, gid); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FCHOWN), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); vrele(nd.ni_vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif int sys_lchown(struct thread *td, struct lchown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif int sys_fchown(struct thread *td, struct fchown_args *uap) { struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_OWNER(uap->uid, uap->gid); error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); if (error != 0) return (error); error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, struct timespec *tsp) { struct timeval tv[2]; const struct timeval *tvp; int error; if (usrtvp == NULL) { vfs_timestamp(&tsp[0]); tsp[1] = tsp[0]; } else { if (tvpseg == UIO_SYSSPACE) { tvp = usrtvp; } else { if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) return (error); tvp = tv; } if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); } return (0); } /* * Common implementation code for futimens(), utimensat(). */ #define UTIMENS_NULL 0x1 #define UTIMENS_EXIT 0x2 static int getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, struct timespec *tsp, int *retflags) { struct timespec tsnow; int error; vfs_timestamp(&tsnow); *retflags = 0; if (usrtsp == NULL) { tsp[0] = tsnow; tsp[1] = tsnow; *retflags |= UTIMENS_NULL; return (0); } if (tspseg == UIO_SYSSPACE) { tsp[0] = usrtsp[0]; tsp[1] = usrtsp[1]; } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) return (error); if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) *retflags |= UTIMENS_EXIT; if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) *retflags |= UTIMENS_NULL; if (tsp[0].tv_nsec == UTIME_OMIT) tsp[0].tv_sec = VNOVAL; else if (tsp[0].tv_nsec == UTIME_NOW) tsp[0] = tsnow; else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) return (EINVAL); if (tsp[1].tv_nsec == UTIME_OMIT) tsp[1].tv_sec = VNOVAL; else if (tsp[1].tv_nsec == UTIME_NOW) tsp[1] = tsnow; else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) return (EINVAL); return (0); } /* * Common implementation code for utimes(), lutimes(), futimes(), futimens(), * and utimensat(). */ static int setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, int numtimes, int nullflag) { struct mount *mp; struct vattr vattr; int error, setbirthtime; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); setbirthtime = 0; if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && timespeccmp(&ts[1], &vattr.va_birthtime, < )) setbirthtime = 1; VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (setbirthtime) vattr.va_birthtime = ts[1]; if (numtimes > 2) vattr.va_birthtime = ts[2]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; #ifdef MAC error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, vattr.va_mtime); #endif if (error == 0) error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif int sys_utimes(struct thread *td, struct utimes_args *uap) { return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct futimesat_args { int fd; const char * path; const struct timeval * times; }; #endif int sys_futimesat(struct thread *td, struct futimesat_args *uap) { return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE)); } int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct nameidata nd; struct timespec ts[2]; cap_rights_t rights; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FUTIMES), td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif int sys_lutimes(struct thread *td, struct lutimes_args *uap) { return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct nameidata nd; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif int sys_futimes(struct thread *td, struct futimes_args *uap) { return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); } int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; cap_rights_t rights; int error; AUDIT_ARG_FD(fd); error = getutimes(tptr, tptrseg, ts); if (error != 0) return (error); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); fdrop(fp, td); return (error); } int sys_futimens(struct thread *td, struct futimens_args *uap) { return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); } int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; cap_rights_t rights; int error, flags; AUDIT_ARG_FD(fd); error = getutimens(tptr, tptrseg, ts, &flags); if (error != 0) return (error); if (flags & UTIMENS_EXIT) return (0); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode, 0); #endif error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); fdrop(fp, td); return (error); } int sys_utimensat(struct thread *td, struct utimensat_args *uap) { return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE, uap->flag)); } int kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int flag) { struct nameidata nd; struct timespec ts[2]; cap_rights_t rights; int error, flags; if (flag & ~AT_SYMLINK_NOFOLLOW) return (EINVAL); if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW) | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FUTIMES), td); if ((error = namei(&nd)) != 0) return (error); /* * We are allowed to call namei() regardless of 2xUTIME_OMIT. * POSIX states: * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." * "Search permission is denied by a component of the path prefix." */ NDFREE(&nd, NDF_ONLY_PNBUF); if ((flags & UTIMENS_EXIT) == 0) error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); vrele(nd.ni_vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif int sys_truncate(struct thread *td, struct truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) { struct mount *mp; struct vnode *vp; void *rl_cookie; struct vattr vattr; struct nameidata nd; int error; if (length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vn_rangelock_unlock(vp, rl_cookie); vrele(vp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = length; error = VOP_SETATTR(vp, &vattr, td->td_ucred); } VOP_UNLOCK(vp, 0); vn_finished_write(mp); vn_rangelock_unlock(vp, rl_cookie); vrele(vp); return (error); } #if defined(COMPAT_43) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif int otruncate(struct thread *td, struct otruncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Versions with the pad argument */ int freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, uap->length)); } #endif int kern_fsync(struct thread *td, int fd, bool fullsync) { struct vnode *vp; struct mount *mp; struct file *fp; cap_rights_t rights; int error, lock_flags; AUDIT_ARG_FD(fd); error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); if (error != 0) return (error); vp = fp->f_vnode; #if 0 if (!fullsync) /* XXXKIB: compete outstanding aio writes */; #endif error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error != 0) goto drop; if (MNT_SHARED_WRITES(mp) || ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { lock_flags = LK_SHARED; } else { lock_flags = LK_EXCLUSIVE; } vn_lock(vp, lock_flags | LK_RETRY); AUDIT_ARG_VNODE1(vp); if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); drop: fdrop(fp, td); return (error); } /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif int sys_fsync(struct thread *td, struct fsync_args *uap) { return (kern_fsync(td, uap->fd, true)); } int sys_fdatasync(struct thread *td, struct fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } /* * Rename files. Source and destination must either both be directories, or * both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif int sys_rename(struct thread *td, struct rename_args *uap) { return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, uap->to, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct renameat_args { int oldfd; char *old; int newfd; char *new; }; #endif int sys_renameat(struct thread *td, struct renameat_args *uap) { return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, UIO_USERSPACE)); } int kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, enum uio_seg pathseg) { struct mount *mp = NULL; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; cap_rights_t rights; int error; again: bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); #endif if ((error = namei(&fromnd)) != 0) return (error); #ifdef MAC error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd); VOP_UNLOCK(fromnd.ni_dvp, 0); if (fromnd.ni_dvp != fromnd.ni_vp) VOP_UNLOCK(fromnd.ni_vp, 0); #endif fvp = fromnd.ni_vp; NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2, pathseg, new, newfd, cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); vrele(tond.ni_startdir); if (fromnd.ni_startdir != NULL) vrele(fromnd.ni_startdir); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); goto again; } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } #ifdef CAPABILITIES if (newfd != AT_FDCWD) { /* * If the target already exists we require CAP_UNLINKAT * from 'newfd'. */ error = cap_check(&tond.ni_filecaps.fc_rights, cap_rights_init(&rights, CAP_UNLINKAT)); if (error != 0) goto out; } #endif } if (fvp == tdvp) { error = EINVAL; goto out; } /* * If the source is the same as the destination (that is, if they * are links to the same vnode), then there is nothing to do. */ if (fvp == tvp) error = -1; #ifdef MAC else error = mac_vnode_check_rename_to(td->td_ucred, tdvp, tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); #endif out: if (error == 0) { error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif int sys_mkdir(struct thread *td, struct mkdir_args *uap) { return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkdirat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkdirat(struct thread *td, struct mkdirat_args *uap) { return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, int mode) { struct mount *mp; struct vnode *vp; struct vattr vattr; struct nameidata nd; cap_rights_t rights; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); /* * XXX namei called with LOCKPARENT but not LOCKLEAF has * the strange behaviour of leaving the vnode unlocked * if the target is the same vnode as the parent. */ if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (error == 0) vput(nd.ni_vp); vn_finished_write(mp); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif int sys_rmdir(struct thread *td, struct rmdir_args *uap) { return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); } int kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int error; restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) { error = EBUSY; goto out; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); return (error); } +#if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) +int +freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, + long *basep, void (*func)(struct freebsd11_dirent *)) +{ + struct freebsd11_dirent dstdp; + struct dirent *dp, *edp; + char *dirbuf; + off_t base; + ssize_t resid, ucount; + int error; + + /* XXX arbitrary sanity limit on `count'. */ + count = min(count, 64 * 1024); + + dirbuf = malloc(count, M_TEMP, M_WAITOK); + + error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, + UIO_SYSSPACE); + if (error != 0) + goto done; + if (basep != NULL) + *basep = base; + + ucount = 0; + for (dp = (struct dirent *)dirbuf, + edp = (struct dirent *)&dirbuf[count - resid]; + ucount < count && dp < edp; ) { + if (dp->d_reclen == 0) + break; + MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); + if (dp->d_namlen >= sizeof(dstdp.d_name)) + continue; + dstdp.d_type = dp->d_type; + dstdp.d_namlen = dp->d_namlen; + dstdp.d_fileno = dp->d_fileno; /* truncate */ + dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + + ((dp->d_namlen + 1 + 3) &~ 3); + bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); + bzero(dstdp.d_name + dstdp.d_namlen, + dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - + dstdp.d_namlen); + MPASS(dstdp.d_reclen <= dp->d_reclen); + MPASS(ucount + dstdp.d_reclen <= count); + if (func != NULL) + func(&dstdp); + error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); + if (error != 0) + break; + dp = (struct dirent *)((char *)dp + dp->d_reclen); + ucount += dstdp.d_reclen; + } + +done: + free(dirbuf, M_TEMP); + if (error == 0) + td->td_retval[0] = ucount; + return (error); +} +#endif /* COMPAT */ + #ifdef COMPAT_43 +static void +ogetdirentries_cvt(struct freebsd11_dirent *dp) +{ +#if (BYTE_ORDER == LITTLE_ENDIAN) + /* + * The expected low byte of dp->d_namlen is our dp->d_type. + * The high MBZ byte of dp->d_namlen is our dp->d_namlen. + */ + dp->d_type = dp->d_namlen; + dp->d_namlen = 0; +#else + /* + * The dp->d_type is the high byte of the expected dp->d_namlen, + * so must be zero'ed. + */ + dp->d_type = 0; +#endif +} + /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) { long loff; int error; error = kern_ogetdirentries(td, uap, &loff); if (error == 0) error = copyout(&loff, uap->basep, sizeof(long)); return (error); } int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff) { - struct vnode *vp; - struct file *fp; - struct uio auio, kuio; - struct iovec aiov, kiov; - struct dirent *dp, *edp; - cap_rights_t rights; - caddr_t dirbuf; - int error, eofflag, readcnt; - long loff; - off_t foffset; + long base; + int error; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) return (EINVAL); - error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); - if (error != 0) - return (error); - if ((fp->f_flag & FREAD) == 0) { - fdrop(fp, td); - return (EBADF); - } - vp = fp->f_vnode; - foffset = foffset_lock(fp, 0); -unionread: - if (vp->v_type != VDIR) { - foffset_unlock(fp, foffset, 0); - fdrop(fp, td); - return (EINVAL); - } - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_td = td; - auio.uio_resid = uap->count; - vn_lock(vp, LK_SHARED | LK_RETRY); - loff = auio.uio_offset = foffset; -#ifdef MAC - error = mac_vnode_check_readdir(td->td_ucred, vp); - if (error != 0) { - VOP_UNLOCK(vp, 0); - foffset_unlock(fp, foffset, FOF_NOUPDATE); - fdrop(fp, td); - return (error); - } -#endif -# if (BYTE_ORDER != LITTLE_ENDIAN) - if (vp->v_mount->mnt_maxsymlinklen <= 0) { - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, - NULL, NULL); - foffset = auio.uio_offset; - } else -# endif - { - kuio = auio; - kuio.uio_iov = &kiov; - kuio.uio_segflg = UIO_SYSSPACE; - kiov.iov_len = uap->count; - dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); - kiov.iov_base = dirbuf; - error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, - NULL, NULL); - foffset = kuio.uio_offset; - if (error == 0) { - readcnt = uap->count - kuio.uio_resid; - edp = (struct dirent *)&dirbuf[readcnt]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - /* - * The expected low byte of - * dp->d_namlen is our dp->d_type. - * The high MBZ byte of dp->d_namlen - * is our dp->d_namlen. - */ - dp->d_type = dp->d_namlen; - dp->d_namlen = 0; -# else - /* - * The dp->d_type is the high byte - * of the expected dp->d_namlen, - * so must be zero'ed. - */ - dp->d_type = 0; -# endif - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; - } - } - if (dp >= edp) - error = uiomove(dirbuf, readcnt, &auio); - } - free(dirbuf, M_TEMP); - } - if (error != 0) { - VOP_UNLOCK(vp, 0); - foffset_unlock(fp, foffset, 0); - fdrop(fp, td); - return (error); - } - if (uap->count == auio.uio_resid && - (vp->v_vflag & VV_ROOT) && - (vp->v_mount->mnt_flag & MNT_UNION)) { - struct vnode *tvp = vp; - vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); - fp->f_vnode = vp; - fp->f_data = vp; - foffset = 0; - vput(tvp); - goto unionread; - } - VOP_UNLOCK(vp, 0); - foffset_unlock(fp, foffset, 0); - fdrop(fp, td); - td->td_retval[0] = uap->count - auio.uio_resid; - if (error == 0) - *ploff = loff; + + error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, + &base, ogetdirentries_cvt); + + if (error == 0 && uap->basep != NULL) + error = copyout(&base, uap->basep, sizeof(long)); + return (error); } #endif /* COMPAT_43 */ -/* - * Read a block of directory entries in a filesystem independent format. - */ +#if defined(COMPAT_FREEBSD11) #ifndef _SYS_SYSPROTO_H_ -struct getdirentries_args { +struct freebsd11_getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int -sys_getdirentries(struct thread *td, struct getdirentries_args *uap) +freebsd11_getdirentries(struct thread *td, + struct freebsd11_getdirentries_args *uap) { long base; int error; + error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, + &base, NULL); + + if (error == 0 && uap->basep != NULL) + error = copyout(&base, uap->basep, sizeof(long)); + return (error); +} + +int +freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) +{ + struct freebsd11_getdirentries_args ap; + + ap.fd = uap->fd; + ap.buf = uap->buf; + ap.count = uap->count; + ap.basep = NULL; + return (freebsd11_getdirentries(td, &ap)); +} +#endif /* COMPAT_FREEBSD11 */ + +/* + * Read a block of directory entries in a filesystem independent format. + */ +int +sys_getdirentries(struct thread *td, struct getdirentries_args *uap) +{ + off_t base; + int error; + error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error != 0) return (error); if (uap->basep != NULL) - error = copyout(&base, uap->basep, sizeof(long)); + error = copyout(&base, uap->basep, sizeof(off_t)); return (error); } int -kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, - long *basep, ssize_t *residp, enum uio_seg bufseg) +kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, + off_t *basep, ssize_t *residp, enum uio_seg bufseg) { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; cap_rights_t rights; - long loff; + off_t loff; int error, eofflag; off_t foffset; AUDIT_ARG_FD(fd); if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; foffset = foffset_lock(fp, 0); unionread: if (vp->v_type != VDIR) { error = EINVAL; goto fail; } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); foffset = auio.uio_offset; if (error != 0) { VOP_UNLOCK(vp, 0); goto fail; } if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; foffset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp, 0); *basep = loff; if (residp != NULL) *residp = auio.uio_resid; td->td_retval[0] = count - auio.uio_resid; fail: foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); -} - -#ifndef _SYS_SYSPROTO_H_ -struct getdents_args { - int fd; - char *buf; - size_t count; -}; -#endif -int -sys_getdents(struct thread *td, struct getdents_args *uap) -{ - struct getdirentries_args ap; - - ap.fd = uap->fd; - ap.buf = uap->buf; - ap.count = uap->count; - ap.basep = NULL; - return (sys_getdirentries(td, &ap)); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int sys_umask(struct thread *td, struct umask_args *uap) { struct filedesc *fdp; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = uap->newmask & ALLPERMS; FILEDESC_XUNLOCK(fdp); return (0); } /* * Void all references to file by ripping underlying filesystem away from * vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif int sys_revoke(struct thread *td, struct revoke_args *uap) { struct vnode *vp; struct vattr vattr; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VCHR || vp->v_rdev == NULL) { error = EINVAL; goto out; } #ifdef MAC error = mac_vnode_check_revoke(td->td_ucred, vp); if (error != 0) goto out; #endif error = VOP_GETATTR(vp, &vattr, td->td_ucred); if (error != 0) goto out; if (td->td_ucred->cr_uid != vattr.va_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto out; } if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry and check that, if it * is a capability, the correct rights are present. A reference on the file * entry is held upon returning. */ int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { struct file *fp; int error; error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); if (error != 0) return (error); /* * The file could be not of the vnode type, or it may be not * yet fully initialized, in which case the f_vnode pointer * may be set, but f_ops is still badfileops. E.g., * devfs_open() transiently create such situation to * facilitate csw d_fdopen(). * * Dupfdopen() handling in kern_openat() installs the * half-baked file into the process descriptor table, allowing * other thread to dereference it. Guard against the race by * checking f_ops. */ if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { fdrop(fp, td); return (EINVAL); } *fpp = fp; return (0); } /* * Get an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct lgetfh_args { char *fname; fhandle_t *fhp; }; #endif int sys_lgetfh(struct thread *td, struct lgetfh_args *uap) { struct nameidata nd; fhandle_t fh; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error == 0) error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int sys_getfh(struct thread *td, struct getfh_args *uap) { struct nameidata nd; fhandle_t fh; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error == 0) error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into an * open descriptor. * * warning: do not remove the priv_check() call or this becomes one giant * security hole. */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int sys_fhopen(struct thread *td, struct fhopen_args *uap) { struct mount *mp; struct vnode *vp; struct fhandle fhp; struct file *fp; int fmode, error; int indx; error = priv_check(td, PRIV_VFS_FHOPEN); if (error != 0) return (error); indx = -1; fmode = FFLAGS(uap->flags); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); if (error != 0) return(error); /* find the mount point */ mp = vfs_busyfs(&fhp.fh_fsid); if (mp == NULL) return (ESTALE); /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = falloc_noinstall(td, &fp); if (error != 0) { vput(vp); return (error); } /* * An extra reference on `fp' has been held for us by * falloc_noinstall(). */ #ifdef INVARIANTS td->td_dupfd = -1; #endif error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); if (error != 0) { KASSERT(fp->f_ops == &badfileops, ("VOP_OPEN in fhopen() set f_ops")); KASSERT(td->td_dupfd < 0, ("fhopen() encountered fdopen()")); vput(vp); goto bad; } #ifdef INVARIANTS td->td_dupfd = 0; #endif fp->f_vnode = vp; fp->f_seqcount = 1; finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, &vnops); VOP_UNLOCK(vp, 0); if ((fmode & O_TRUNC) != 0) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; return (error); } /* * Stat an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int sys_fhstat(struct thread *td, struct fhstat_args *uap) { struct stat sb; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fh)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error == 0) error = copyout(&sb, uap->sb, sizeof(sb)); return (error); } int kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) { struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTAT); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); vput(vp); return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) { struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(*sfp)); free(sfp, M_STATFS); return (error); } int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) { struct statfs *sp; struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTATFS); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); if (error != 0) { vfs_unbusy(mp); return (error); } vput(vp); error = prison_canseemount(td->td_ucred, mp); if (error != 0) goto out; #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif /* * Set these in case the underlying filesystem fails to do so. */ sp = &mp->mnt_stat; sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp); if (error == 0) *buf = *sp; out: vfs_unbusy(mp); return (error); } int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) { struct file *fp; struct mount *mp; struct vnode *vp; cap_rights_t rights; off_t olen, ooffset; int error; #ifdef AUDIT int audited_vnode1 = 0; #endif AUDIT_ARG_FD(fd); if (offset < 0 || len <= 0) return (EINVAL); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); AUDIT_ARG_FD(fd); error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); if (error != 0) return (error); AUDIT_ARG_FILE(td->td_proc, fp); if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { error = ESPIPE; goto out; } if ((fp->f_flag & FWRITE) == 0) { error = EBADF; goto out; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto out; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = ENODEV; goto out; } /* Allocating blocks may take a long time, so iterate. */ for (;;) { olen = len; ooffset = offset; bwillwrite(); mp = NULL; error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error != 0) break; error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) { vn_finished_write(mp); break; } #ifdef AUDIT if (!audited_vnode1) { AUDIT_ARG_VNODE1(vp); audited_vnode1 = 1; } #endif #ifdef MAC error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); if (error == 0) #endif error = VOP_ALLOCATE(vp, &offset, &len); VOP_UNLOCK(vp, 0); vn_finished_write(mp); if (olen + ooffset != offset + len) { panic("offset + len changed from %jx/%jx to %jx/%jx", ooffset, olen, offset, len); } if (error != 0 || len == 0) break; KASSERT(olen > len, ("Iteration did not make progress?")); maybe_yield(); } out: fdrop(fp, td); return (error); } int sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); return (kern_posix_error(td, error)); } /* * Unlike madvise(2), we do not make a best effort to remember every * possible caching hint. Instead, we remember the last setting with * the exception that we will allow POSIX_FADV_NORMAL to adjust the * region of any current setting. */ int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice) { struct fadvise_info *fa, *new; struct file *fp; struct vnode *vp; cap_rights_t rights; off_t end; int error; if (offset < 0 || len < 0 || offset > OFF_MAX - len) return (EINVAL); AUDIT_ARG_VALUE(advice); switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); break; case POSIX_FADV_NORMAL: case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: new = NULL; break; default: return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ AUDIT_ARG_FD(fd); error = fget(td, fd, cap_rights_init(&rights), &fp); if (error != 0) goto out; AUDIT_ARG_FILE(td->td_proc, fp); if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { error = ESPIPE; goto out; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto out; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = ENODEV; goto out; } if (len == 0) end = OFF_MAX; else end = offset + len - 1; switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: /* * Try to merge any existing non-standard region with * this new region if possible, otherwise create a new * non-standard region for this request. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL && fa->fa_advice == advice && ((fa->fa_start <= end && fa->fa_end >= offset) || (end != OFF_MAX && fa->fa_start == end + 1) || (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { if (offset < fa->fa_start) fa->fa_start = offset; if (end > fa->fa_end) fa->fa_end = end; } else { new->fa_advice = advice; new->fa_start = offset; new->fa_end = end; fp->f_advice = new; new = fa; } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_NORMAL: /* * If a the "normal" region overlaps with an existing * non-standard region, trim or remove the * non-standard region. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL) { if (offset <= fa->fa_start && end >= fa->fa_end) { new = fa; fp->f_advice = NULL; } else if (offset <= fa->fa_start && end >= fa->fa_start) fa->fa_start = end + 1; else if (offset <= fa->fa_end && end >= fa->fa_end) fa->fa_end = offset - 1; else if (offset >= fa->fa_start && end <= fa->fa_end) { /* * If the "normal" region is a middle * portion of the existing * non-standard region, just remove * the whole thing rather than picking * one side or the other to * preserve. */ new = fa; fp->f_advice = NULL; } } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: error = VOP_ADVISE(vp, offset, end, advice); break; } out: if (fp != NULL) fdrop(fp, td); free(new, M_FADVISE); return (error); } int sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, uap->advice); return (kern_posix_error(td, error)); } Index: head/sys/kern/vfs_vnops.c =================================================================== --- head/sys/kern/vfs_vnops.c (revision 318735) +++ head/sys/kern/vfs_vnops.c (revision 318736) @@ -1,2491 +1,2495 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Copyright (c) 2012 Konstantin Belousov * Copyright (c) 2013, 2014 The FreeBSD Foundation * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; static fo_rdwr_t vn_io_fault; static fo_truncate_t vn_truncate; static fo_ioctl_t vn_ioctl; static fo_poll_t vn_poll; static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; struct fileops vnops = { .fo_read = vn_io_fault, .fo_write = vn_io_fault, .fo_truncate = vn_truncate, .fo_ioctl = vn_ioctl, .fo_poll = vn_poll, .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, .fo_chmod = vn_chmod, .fo_chown = vn_chown, .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; static const int io_hold_cnt = 16; static int vn_io_fault_enable = 1; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW, &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); static int vn_io_fault_prefault = 0; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RW, &vn_io_fault_prefault, 0, "Enable vn_io_fault prefaulting"); static u_long vn_io_faults_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD, &vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers"); /* * Returns true if vn_io_fault mode of handling the i/o request should * be used. */ static bool do_vn_io_fault(struct vnode *vp, struct uio *uio) { struct mount *mp; return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG && (mp = vp->v_mount) != NULL && (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && vn_io_fault_enable); } /* * Structure used to pass arguments to vn_io_fault1(), to do either * file- or vnode-based I/O calls. */ struct vn_io_fault_args { enum { VN_IO_FAULT_FOP, VN_IO_FAULT_VOP } kind; struct ucred *cred; int flags; union { struct fop_args_tag { struct file *fp; fo_rdwr_t *doio; } fop_args; struct vop_args_tag { struct vnode *vp; } vop_args; } args; }; static int vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args, struct thread *td); int vn_open(ndp, flagp, cmode, fp) struct nameidata *ndp; int *flagp, cmode; struct file *fp; { struct thread *td = ndp->ni_cnd.cn_thread; return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp)); } /* * Common code for vnode open operations via a name lookup. * Lookup the vnode and invoke VOP_CREATE if needed. * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. * * Note that this does NOT free nameidata for the successful case, * due to the NDINIT being done elsewhere. */ int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, struct ucred *cred, struct file *fp) { struct vnode *vp; struct mount *mp; struct thread *td = ndp->ni_cnd.cn_thread; struct vattr vat; struct vattr *vap = &vat; int fmode, error; restart: fmode = *flagp; if ((fmode & (O_CREAT | O_EXCL | O_DIRECTORY)) == (O_CREAT | O_EXCL | O_DIRECTORY)) return (EINVAL); else if ((fmode & (O_CREAT | O_DIRECTORY)) == O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; /* * Set NOCACHE to avoid flushing the cache when * rolling in many files at once. */ ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | NOCACHE; if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) ndp->ni_cnd.cn_flags |= NOCAPCHECK; bwillwrite(); if ((error = namei(ndp)) != 0) return (error); if (ndp->ni_vp == NULL) { VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; if (fmode & O_EXCL) vap->va_vaflags |= VA_EXCLUSIVE; if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(ndp, NDF_ONLY_PNBUF); vput(ndp->ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, &ndp->ni_cnd, vap); if (error == 0) #endif error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, vap); vput(ndp->ni_dvp); vn_finished_write(mp); if (error) { NDFREE(ndp, NDF_ONLY_PNBUF); return (error); } fmode &= ~O_TRUNC; vp = ndp->ni_vp; } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); ndp->ni_dvp = NULL; vp = ndp->ni_vp; if (fmode & O_EXCL) { error = EEXIST; goto bad; } fmode &= ~O_CREAT; } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags = ISOPEN | ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; if (!(fmode & FWRITE)) ndp->ni_cnd.cn_flags |= LOCKSHARED; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) ndp->ni_cnd.cn_flags |= NOCAPCHECK; if ((error = namei(ndp)) != 0) return (error); vp = ndp->ni_vp; } error = vn_open_vnode(vp, fmode, cred, td, fp); if (error) goto bad; *flagp = fmode; return (0); bad: NDFREE(ndp, NDF_ONLY_PNBUF); vput(vp); *flagp = fmode; ndp->ni_vp = NULL; return (error); } /* * Common code for vnode open operations once a vnode is located. * Check permissions, and call the VOP_OPEN routine. */ int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp) { accmode_t accmode; struct flock lf; int error, lock_flags, type; if (vp->v_type == VLNK) return (EMLINK); if (vp->v_type == VSOCK) return (EOPNOTSUPP); if (vp->v_type != VDIR && fmode & O_DIRECTORY) return (ENOTDIR); accmode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) return (EISDIR); accmode |= VWRITE; } if (fmode & FREAD) accmode |= VREAD; if (fmode & FEXEC) accmode |= VEXEC; if ((fmode & O_APPEND) && (fmode & FWRITE)) accmode |= VAPPEND; #ifdef MAC if (fmode & O_CREAT) accmode |= VCREAT; if (fmode & O_VERIFY) accmode |= VVERIFY; error = mac_vnode_check_open(cred, vp, accmode); if (error) return (error); accmode &= ~(VCREAT | VVERIFY); #endif if ((fmode & O_CREAT) == 0) { if (accmode & VWRITE) { error = vn_writechk(vp); if (error) return (error); } if (accmode) { error = VOP_ACCESS(vp, accmode, cred, td); if (error) return (error); } } if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vn_lock(vp, LK_UPGRADE | LK_RETRY); if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0) return (error); while ((fmode & (O_EXLOCK | O_SHLOCK)) != 0) { KASSERT(fp != NULL, ("open with flock requires fp")); if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE) { error = EOPNOTSUPP; break; } lock_flags = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (fmode & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); if (error == 0) fp->f_flag |= FHASLOCK; vn_lock(vp, lock_flags | LK_RETRY); if (error != 0) break; if ((vp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; break; } /* * Another thread might have used this vnode as an * executable while the vnode lock was dropped. * Ensure the vnode is still able to be opened for * writing after the lock has been obtained. */ if ((accmode & VWRITE) != 0) error = vn_writechk(vp); break; } if (error != 0) { fp->f_flag |= FOPENFAILED; fp->f_vnode = vp; if (fp->f_ops == &badfileops) { fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; } vref(vp); } else if ((fmode & FWRITE) != 0) { VOP_ADD_WRITECOUNT(vp, 1); CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp, vp->v_writecount); } ASSERT_VOP_LOCKED(vp, "vn_open_vnode"); return (error); } /* * Check for write permissions on the specified vnode. * Prototype text segments cannot be written. */ int vn_writechk(struct vnode *vp) { ASSERT_VOP_LOCKED(vp, "vn_writechk"); /* * If there's shared text associated with * the vnode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp)) return (ETXTBSY); return (0); } /* * Vnode close call */ static int vn_close1(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td, bool keep_ref) { struct mount *mp; int error, lock_flags; if (vp->v_type != VFIFO && (flags & FWRITE) == 0 && MNT_EXTENDED_SHARED(vp->v_mount)) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, lock_flags | LK_RETRY); AUDIT_ARG_VNODE1(vp); if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) { VNASSERT(vp->v_writecount > 0, vp, ("vn_close: negative writecount")); VOP_ADD_WRITECOUNT(vp, -1); CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, vp, vp->v_writecount); } error = VOP_CLOSE(vp, flags, file_cred, td); if (keep_ref) VOP_UNLOCK(vp, 0); else vput(vp); vn_finished_write(mp); return (error); } int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td) { return (vn_close1(vp, flags, file_cred, td, false)); } /* * Heuristic to detect sequential operation. */ static int sequential_heuristic(struct uio *uio, struct file *fp) { ASSERT_VOP_LOCKED(fp->f_vnode, __func__); if (fp->f_flag & FRDAHEAD) return (fp->f_seqcount << IO_SEQSHIFT); /* * Offset 0 is handled specially. open() sets f_seqcount to 1 so * that the first I/O is normally considered to be slightly * sequential. Seeking to offset 0 doesn't change sequentiality * unless previous seeks have reduced f_seqcount to 0, in which * case offset 0 is not special. */ if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || uio->uio_offset == fp->f_nextoff) { /* * f_seqcount is in units of fixed-size blocks so that it * depends mainly on the amount of sequential I/O and not * much on the number of sequential I/O's. The fixed size * of 16384 is hard-coded here since it is (not quite) just * a magic size that works well here. This size is more * closely related to the best I/O size for real disks than * to any block size used by software. */ fp->f_seqcount += howmany(uio->uio_resid, 16384); if (fp->f_seqcount > IO_SEQMAX) fp->f_seqcount = IO_SEQMAX; return (fp->f_seqcount << IO_SEQSHIFT); } /* Not sequential. Quickly draw-down sequentiality. */ if (fp->f_seqcount > 1) fp->f_seqcount = 1; else fp->f_seqcount = 0; return (0); } /* * Package up an I/O request on a vnode into a uio and do it. */ int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid, struct thread *td) { struct uio auio; struct iovec aiov; struct mount *mp; struct ucred *cred; void *rl_cookie; struct vn_io_fault_args args; int error, lock_flags; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; aiov.iov_len = len; auio.uio_resid = len; auio.uio_offset = offset; auio.uio_segflg = segflg; auio.uio_rw = rw; auio.uio_td = td; error = 0; if ((ioflg & IO_NODELOCKED) == 0) { if ((ioflg & IO_RANGELOCKED) == 0) { if (rw == UIO_READ) { rl_cookie = vn_rangelock_rlock(vp, offset, offset + len); } else { rl_cookie = vn_rangelock_wlock(vp, offset, offset + len); } } else rl_cookie = NULL; mp = NULL; if (rw == UIO_WRITE) { if (vp->v_type != VCHR && (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto out; if (MNT_SHARED_WRITES(mp) || ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; } else lock_flags = LK_SHARED; vn_lock(vp, lock_flags | LK_RETRY); } else rl_cookie = NULL; ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); #ifdef MAC if ((ioflg & IO_NOMACCHECK) == 0) { if (rw == UIO_READ) error = mac_vnode_check_read(active_cred, file_cred, vp); else error = mac_vnode_check_write(active_cred, file_cred, vp); } #endif if (error == 0) { if (file_cred != NULL) cred = file_cred; else cred = active_cred; if (do_vn_io_fault(vp, &auio)) { args.kind = VN_IO_FAULT_VOP; args.cred = cred; args.flags = ioflg; args.args.vop_args.vp = vp; error = vn_io_fault1(vp, &auio, &args, td); } else if (rw == UIO_READ) { error = VOP_READ(vp, &auio, ioflg, cred); } else /* if (rw == UIO_WRITE) */ { error = VOP_WRITE(vp, &auio, ioflg, cred); } } if (aresid) *aresid = auio.uio_resid; else if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) { VOP_UNLOCK(vp, 0); if (mp != NULL) vn_finished_write(mp); } out: if (rl_cookie != NULL) vn_rangelock_unlock(vp, rl_cookie); return (error); } /* * Package up an I/O request on a vnode into a uio and do it. The I/O * request is split up into smaller chunks and we try to avoid saturating * the buffer cache while potentially holding a vnode locked, so we * check bwillwrite() before calling vn_rdwr(). We also call kern_yield() * to give other processes a chance to lock the vnode (either other processes * core'ing the same binary, or unrelated processes scanning the directory). */ int vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred, aresid, td) enum uio_rw rw; struct vnode *vp; void *base; size_t len; off_t offset; enum uio_seg segflg; int ioflg; struct ucred *active_cred; struct ucred *file_cred; size_t *aresid; struct thread *td; { int error = 0; ssize_t iaresid; do { int chunk; /* * Force `offset' to a multiple of MAXBSIZE except possibly * for the first chunk, so that filesystems only need to * write full blocks except possibly for the first and last * chunks. */ chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE; if (chunk > len) chunk = len; if (rw != UIO_READ && vp->v_type == VREG) bwillwrite(); iaresid = 0; error = vn_rdwr(rw, vp, base, chunk, offset, segflg, ioflg, active_cred, file_cred, &iaresid, td); len -= chunk; /* aresid calc already includes length */ if (error) break; offset += chunk; base = (char *)base + chunk; kern_yield(PRI_USER); } while (len); if (aresid) *aresid = len + iaresid; return (error); } off_t foffset_lock(struct file *fp, int flags) { struct mtx *mtxp; off_t res; KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); #if OFF_MAX <= LONG_MAX /* * Caller only wants the current f_offset value. Assume that * the long and shorter integer types reads are atomic. */ if ((flags & FOF_NOLOCK) != 0) return (fp->f_offset); #endif /* * According to McKusick the vn lock was protecting f_offset here. * It is now protected by the FOFFSET_LOCKED flag. */ mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if ((flags & FOF_NOLOCK) == 0) { while (fp->f_vnread_flags & FOFFSET_LOCKED) { fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; msleep(&fp->f_vnread_flags, mtxp, PUSER -1, "vofflock", 0); } fp->f_vnread_flags |= FOFFSET_LOCKED; } res = fp->f_offset; mtx_unlock(mtxp); return (res); } void foffset_unlock(struct file *fp, off_t val, int flags) { struct mtx *mtxp; KASSERT((flags & FOF_OFFSET) == 0, ("FOF_OFFSET passed")); #if OFF_MAX <= LONG_MAX if ((flags & FOF_NOLOCK) != 0) { if ((flags & FOF_NOUPDATE) == 0) fp->f_offset = val; if ((flags & FOF_NEXTOFF) != 0) fp->f_nextoff = val; return; } #endif mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if ((flags & FOF_NOUPDATE) == 0) fp->f_offset = val; if ((flags & FOF_NEXTOFF) != 0) fp->f_nextoff = val; if ((flags & FOF_NOLOCK) == 0) { KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0, ("Lost FOFFSET_LOCKED")); if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING) wakeup(&fp->f_vnread_flags); fp->f_vnread_flags = 0; } mtx_unlock(mtxp); } void foffset_lock_uio(struct file *fp, struct uio *uio, int flags) { if ((flags & FOF_OFFSET) == 0) uio->uio_offset = foffset_lock(fp, flags); } void foffset_unlock_uio(struct file *fp, struct uio *uio, int flags) { if ((flags & FOF_OFFSET) == 0) foffset_unlock(fp, uio->uio_offset, flags); } static int get_advice(struct file *fp, struct uio *uio) { struct mtx *mtxp; int ret; ret = POSIX_FADV_NORMAL; if (fp->f_advice == NULL || fp->f_vnode->v_type != VREG) return (ret); mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); if (fp->f_advice != NULL && uio->uio_offset >= fp->f_advice->fa_start && uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end) ret = fp->f_advice->fa_advice; mtx_unlock(mtxp); return (ret); } /* * File table vnode read routine. */ static int vn_read(fp, uio, active_cred, flags, td) struct file *fp; struct uio *uio; struct ucred *active_cred; int flags; struct thread *td; { struct vnode *vp; off_t orig_offset; int error, ioflag; int advice; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; ioflag = 0; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; advice = get_advice(fp, uio); vn_lock(vp, LK_SHARED | LK_RETRY); switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_NOREUSE: ioflag |= sequential_heuristic(uio, fp); break; case POSIX_FADV_RANDOM: /* Disable read-ahead for random I/O. */ break; } orig_offset = uio->uio_offset; #ifdef MAC error = mac_vnode_check_read(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_READ(vp, uio, ioflag, fp->f_cred); fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); if (error == 0 && advice == POSIX_FADV_NOREUSE && orig_offset != uio->uio_offset) /* * Use POSIX_FADV_DONTNEED to flush pages and buffers * for the backing file after a POSIX_FADV_NOREUSE * read(2). */ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1, POSIX_FADV_DONTNEED); return (error); } /* * File table vnode write routine. */ static int vn_write(fp, uio, active_cred, flags, td) struct file *fp; struct uio *uio; struct ucred *active_cred; int flags; struct thread *td; { struct vnode *vp; struct mount *mp; off_t orig_offset; int error, ioflag, lock_flags; int advice; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; if (vp->v_type == VREG) bwillwrite(); ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; if ((fp->f_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; mp = NULL; if (vp->v_type != VCHR && (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto unlock; advice = get_advice(fp, uio); if (MNT_SHARED_WRITES(mp) || (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) { lock_flags = LK_SHARED; } else { lock_flags = LK_EXCLUSIVE; } vn_lock(vp, lock_flags | LK_RETRY); switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_NOREUSE: ioflag |= sequential_heuristic(uio, fp); break; case POSIX_FADV_RANDOM: /* XXX: Is this correct? */ break; } orig_offset = uio->uio_offset; #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif error = VOP_WRITE(vp, uio, ioflag, fp->f_cred); fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0); if (vp->v_type != VCHR) vn_finished_write(mp); if (error == 0 && advice == POSIX_FADV_NOREUSE && orig_offset != uio->uio_offset) /* * Use POSIX_FADV_DONTNEED to flush pages and buffers * for the backing file after a POSIX_FADV_NOREUSE * write(2). */ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1, POSIX_FADV_DONTNEED); unlock: return (error); } /* * The vn_io_fault() is a wrapper around vn_read() and vn_write() to * prevent the following deadlock: * * Assume that the thread A reads from the vnode vp1 into userspace * buffer buf1 backed by the pages of vnode vp2. If a page in buf1 is * currently not resident, then system ends up with the call chain * vn_read() -> VOP_READ(vp1) -> uiomove() -> [Page Fault] -> * vm_fault(buf1) -> vnode_pager_getpages(vp2) -> VOP_GETPAGES(vp2) * which establishes lock order vp1->vn_lock, then vp2->vn_lock. * If, at the same time, thread B reads from vnode vp2 into buffer buf2 * backed by the pages of vnode vp1, and some page in buf2 is not * resident, we get a reversed order vp2->vn_lock, then vp1->vn_lock. * * To prevent the lock order reversal and deadlock, vn_io_fault() does * not allow page faults to happen during VOP_READ() or VOP_WRITE(). * Instead, it first tries to do the whole range i/o with pagefaults * disabled. If all pages in the i/o buffer are resident and mapped, * VOP will succeed (ignoring the genuine filesystem errors). * Otherwise, we get back EFAULT, and vn_io_fault() falls back to do * i/o in chunks, with all pages in the chunk prefaulted and held * using vm_fault_quick_hold_pages(). * * Filesystems using this deadlock avoidance scheme should use the * array of the held pages from uio, saved in the curthread->td_ma, * instead of doing uiomove(). A helper function * vn_io_fault_uiomove() converts uiomove request into * uiomove_fromphys() over td_ma array. * * Since vnode locks do not cover the whole i/o anymore, rangelocks * make the current i/o request atomic with respect to other i/os and * truncations. */ /* * Decode vn_io_fault_args and perform the corresponding i/o. */ static int vn_io_fault_doio(struct vn_io_fault_args *args, struct uio *uio, struct thread *td) { switch (args->kind) { case VN_IO_FAULT_FOP: return ((args->args.fop_args.doio)(args->args.fop_args.fp, uio, args->cred, args->flags, td)); case VN_IO_FAULT_VOP: if (uio->uio_rw == UIO_READ) { return (VOP_READ(args->args.vop_args.vp, uio, args->flags, args->cred)); } else if (uio->uio_rw == UIO_WRITE) { return (VOP_WRITE(args->args.vop_args.vp, uio, args->flags, args->cred)); } break; } panic("vn_io_fault_doio: unknown kind of io %d %d", args->kind, uio->uio_rw); } static int vn_io_fault_touch(char *base, const struct uio *uio) { int r; r = fubyte(base); if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1)) return (EFAULT); return (0); } static int vn_io_fault_prefault_user(const struct uio *uio) { char *base; const struct iovec *iov; size_t len; ssize_t resid; int error, i; KASSERT(uio->uio_segflg == UIO_USERSPACE, ("vn_io_fault_prefault userspace")); error = i = 0; iov = uio->uio_iov; resid = uio->uio_resid; base = iov->iov_base; len = iov->iov_len; while (resid > 0) { error = vn_io_fault_touch(base, uio); if (error != 0) break; if (len < PAGE_SIZE) { if (len != 0) { error = vn_io_fault_touch(base + len - 1, uio); if (error != 0) break; resid -= len; } if (++i >= uio->uio_iovcnt) break; iov = uio->uio_iov + i; base = iov->iov_base; len = iov->iov_len; } else { len -= PAGE_SIZE; base += PAGE_SIZE; resid -= PAGE_SIZE; } } return (error); } /* * Common code for vn_io_fault(), agnostic to the kind of i/o request. * Uses vn_io_fault_doio() to make the call to an actual i/o function. * Used from vn_rdwr() and vn_io_fault(), which encode the i/o request * into args and call vn_io_fault1() to handle faults during the user * mode buffer accesses. */ static int vn_io_fault1(struct vnode *vp, struct uio *uio, struct vn_io_fault_args *args, struct thread *td) { vm_page_t ma[io_hold_cnt + 2]; struct uio *uio_clone, short_uio; struct iovec short_iovec[1]; vm_page_t *prev_td_ma; vm_prot_t prot; vm_offset_t addr, end; size_t len, resid; ssize_t adv; int error, cnt, save, saveheld, prev_td_ma_cnt; if (vn_io_fault_prefault) { error = vn_io_fault_prefault_user(uio); if (error != 0) return (error); /* Or ignore ? */ } prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ; /* * The UFS follows IO_UNIT directive and replays back both * uio_offset and uio_resid if an error is encountered during the * operation. But, since the iovec may be already advanced, * uio is still in an inconsistent state. * * Cache a copy of the original uio, which is advanced to the redo * point using UIO_NOCOPY below. */ uio_clone = cloneuio(uio); resid = uio->uio_resid; short_uio.uio_segflg = UIO_USERSPACE; short_uio.uio_rw = uio->uio_rw; short_uio.uio_td = uio->uio_td; save = vm_fault_disable_pagefaults(); error = vn_io_fault_doio(args, uio, td); if (error != EFAULT) goto out; atomic_add_long(&vn_io_faults_cnt, 1); uio_clone->uio_segflg = UIO_NOCOPY; uiomove(NULL, resid - uio->uio_resid, uio_clone); uio_clone->uio_segflg = uio->uio_segflg; saveheld = curthread_pflags_set(TDP_UIOHELD); prev_td_ma = td->td_ma; prev_td_ma_cnt = td->td_ma_cnt; while (uio_clone->uio_resid != 0) { len = uio_clone->uio_iov->iov_len; if (len == 0) { KASSERT(uio_clone->uio_iovcnt >= 1, ("iovcnt underflow")); uio_clone->uio_iov++; uio_clone->uio_iovcnt--; continue; } if (len > io_hold_cnt * PAGE_SIZE) len = io_hold_cnt * PAGE_SIZE; addr = (uintptr_t)uio_clone->uio_iov->iov_base; end = round_page(addr + len); if (end < addr) { error = EFAULT; break; } cnt = atop(end - trunc_page(addr)); /* * A perfectly misaligned address and length could cause * both the start and the end of the chunk to use partial * page. +2 accounts for such a situation. */ cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map, addr, len, prot, ma, io_hold_cnt + 2); if (cnt == -1) { error = EFAULT; break; } short_uio.uio_iov = &short_iovec[0]; short_iovec[0].iov_base = (void *)addr; short_uio.uio_iovcnt = 1; short_uio.uio_resid = short_iovec[0].iov_len = len; short_uio.uio_offset = uio_clone->uio_offset; td->td_ma = ma; td->td_ma_cnt = cnt; error = vn_io_fault_doio(args, &short_uio, td); vm_page_unhold_pages(ma, cnt); adv = len - short_uio.uio_resid; uio_clone->uio_iov->iov_base = (char *)uio_clone->uio_iov->iov_base + adv; uio_clone->uio_iov->iov_len -= adv; uio_clone->uio_resid -= adv; uio_clone->uio_offset += adv; uio->uio_resid -= adv; uio->uio_offset += adv; if (error != 0 || adv == 0) break; } td->td_ma = prev_td_ma; td->td_ma_cnt = prev_td_ma_cnt; curthread_pflags_restore(saveheld); out: vm_fault_enable_pagefaults(save); free(uio_clone, M_IOV); return (error); } static int vn_io_fault(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { fo_rdwr_t *doio; struct vnode *vp; void *rl_cookie; struct vn_io_fault_args args; int error; doio = uio->uio_rw == UIO_READ ? vn_read : vn_write; vp = fp->f_vnode; foffset_lock_uio(fp, uio, flags); if (do_vn_io_fault(vp, uio)) { args.kind = VN_IO_FAULT_FOP; args.args.fop_args.fp = fp; args.args.fop_args.doio = doio; args.cred = active_cred; args.flags = flags | FOF_OFFSET; if (uio->uio_rw == UIO_READ) { rl_cookie = vn_rangelock_rlock(vp, uio->uio_offset, uio->uio_offset + uio->uio_resid); } else if ((fp->f_flag & O_APPEND) != 0 || (flags & FOF_OFFSET) == 0) { /* For appenders, punt and lock the whole range. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); } else { rl_cookie = vn_rangelock_wlock(vp, uio->uio_offset, uio->uio_offset + uio->uio_resid); } error = vn_io_fault1(vp, uio, &args, td); vn_rangelock_unlock(vp, rl_cookie); } else { error = doio(fp, uio, active_cred, flags | FOF_OFFSET, td); } foffset_unlock_uio(fp, uio, flags); return (error); } /* * Helper function to perform the requested uiomove operation using * the held pages for io->uio_iov[0].iov_base buffer instead of * copyin/copyout. Access to the pages with uiomove_fromphys() * instead of iov_base prevents page faults that could occur due to * pmap_collect() invalidating the mapping created by * vm_fault_quick_hold_pages(), or pageout daemon, page laundry or * object cleanup revoking the write access from page mappings. * * Filesystems specified MNTK_NO_IOPF shall use vn_io_fault_uiomove() * instead of plain uiomove(). */ int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio) { struct uio transp_uio; struct iovec transp_iov[1]; struct thread *td; size_t adv; int error, pgadv; td = curthread; if ((td->td_pflags & TDP_UIOHELD) == 0 || uio->uio_segflg != UIO_USERSPACE) return (uiomove(data, xfersize, uio)); KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt)); transp_iov[0].iov_base = data; transp_uio.uio_iov = &transp_iov[0]; transp_uio.uio_iovcnt = 1; if (xfersize > uio->uio_resid) xfersize = uio->uio_resid; transp_uio.uio_resid = transp_iov[0].iov_len = xfersize; transp_uio.uio_offset = 0; transp_uio.uio_segflg = UIO_SYSSPACE; /* * Since transp_iov points to data, and td_ma page array * corresponds to original uio->uio_iov, we need to invert the * direction of the i/o operation as passed to * uiomove_fromphys(). */ switch (uio->uio_rw) { case UIO_WRITE: transp_uio.uio_rw = UIO_READ; break; case UIO_READ: transp_uio.uio_rw = UIO_WRITE; break; } transp_uio.uio_td = uio->uio_td; error = uiomove_fromphys(td->td_ma, ((vm_offset_t)uio->uio_iov->iov_base) & PAGE_MASK, xfersize, &transp_uio); adv = xfersize - transp_uio.uio_resid; pgadv = (((vm_offset_t)uio->uio_iov->iov_base + adv) >> PAGE_SHIFT) - (((vm_offset_t)uio->uio_iov->iov_base) >> PAGE_SHIFT); td->td_ma += pgadv; KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt, pgadv)); td->td_ma_cnt -= pgadv; uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + adv; uio->uio_iov->iov_len -= adv; uio->uio_resid -= adv; uio->uio_offset += adv; return (error); } int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio) { struct thread *td; vm_offset_t iov_base; int cnt, pgadv; td = curthread; if ((td->td_pflags & TDP_UIOHELD) == 0 || uio->uio_segflg != UIO_USERSPACE) return (uiomove_fromphys(ma, offset, xfersize, uio)); KASSERT(uio->uio_iovcnt == 1, ("uio_iovcnt %d", uio->uio_iovcnt)); cnt = xfersize > uio->uio_resid ? uio->uio_resid : xfersize; iov_base = (vm_offset_t)uio->uio_iov->iov_base; switch (uio->uio_rw) { case UIO_WRITE: pmap_copy_pages(td->td_ma, iov_base & PAGE_MASK, ma, offset, cnt); break; case UIO_READ: pmap_copy_pages(ma, offset, td->td_ma, iov_base & PAGE_MASK, cnt); break; } pgadv = ((iov_base + cnt) >> PAGE_SHIFT) - (iov_base >> PAGE_SHIFT); td->td_ma += pgadv; KASSERT(td->td_ma_cnt >= pgadv, ("consumed pages %d %d", td->td_ma_cnt, pgadv)); td->td_ma_cnt -= pgadv; uio->uio_iov->iov_base = (char *)(iov_base + cnt); uio->uio_iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; return (0); } /* * File table truncate routine. */ static int vn_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { struct vattr vattr; struct mount *mp; struct vnode *vp; void *rl_cookie; int error; vp = fp->f_vnode; /* * Lock the whole range for truncation. Otherwise split i/o * might happen partly before and partly after the truncation. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) goto out1; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(vp); if (vp->v_type == VDIR) { error = EISDIR; goto out; } #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error) goto out; #endif error = vn_writechk(vp); if (error == 0) { VATTR_NULL(&vattr); vattr.va_size = length; if ((fp->f_flag & O_FSYNC) != 0) vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, fp->f_cred); } out: VOP_UNLOCK(vp, 0); vn_finished_write(mp); out1: vn_rangelock_unlock(vp, rl_cookie); return (error); } /* * File table vnode stat routine. */ static int vn_statfile(fp, sb, active_cred, td) struct file *fp; struct stat *sb; struct ucred *active_cred; struct thread *td; { struct vnode *vp = fp->f_vnode; int error; vn_lock(vp, LK_SHARED | LK_RETRY); error = vn_stat(vp, sb, active_cred, fp->f_cred, td); VOP_UNLOCK(vp, 0); return (error); } /* * Stat a vnode; implementation for the stat syscall */ int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred, struct ucred *file_cred, struct thread *td) { struct vattr vattr; struct vattr *vap; int error; u_short mode; AUDIT_ARG_VNODE1(vp); #ifdef MAC error = mac_vnode_check_stat(active_cred, file_cred, vp); if (error) return (error); #endif vap = &vattr; /* * Initialize defaults for new and unusual fields, so that file * systems which don't support these fields don't need to know * about them. */ vap->va_birthtime.tv_sec = -1; vap->va_birthtime.tv_nsec = 0; vap->va_fsid = VNOVAL; vap->va_rdev = NODEV; error = VOP_GETATTR(vp, vap, active_cred); if (error) return (error); /* * Zero the spare stat fields */ bzero(sb, sizeof *sb); /* * Copy from vattr table */ if (vap->va_fsid != VNOVAL) sb->st_dev = vap->va_fsid; else sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; sb->st_ino = vap->va_fileid; mode = vap->va_mode; switch (vap->va_type) { case VREG: mode |= S_IFREG; break; case VDIR: mode |= S_IFDIR; break; case VBLK: mode |= S_IFBLK; break; case VCHR: mode |= S_IFCHR; break; case VLNK: mode |= S_IFLNK; break; case VSOCK: mode |= S_IFSOCK; break; case VFIFO: mode |= S_IFIFO; break; default: return (EBADF); } sb->st_mode = mode; sb->st_nlink = vap->va_nlink; sb->st_uid = vap->va_uid; sb->st_gid = vap->va_gid; sb->st_rdev = vap->va_rdev; if (vap->va_size > OFF_MAX) return (EOVERFLOW); sb->st_size = vap->va_size; sb->st_atim = vap->va_atime; sb->st_mtim = vap->va_mtime; sb->st_ctim = vap->va_ctime; sb->st_birthtim = vap->va_birthtime; /* * According to www.opengroup.org, the meaning of st_blksize is * "a filesystem-specific preferred I/O block size for this * object. In some filesystem types, this may vary from file * to file" * Use miminum/default of PAGE_SIZE (e.g. for VCHR). */ sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize); sb->st_flags = vap->va_flags; if (priv_check(td, PRIV_VFS_GENERATION)) sb->st_gen = 0; else sb->st_gen = vap->va_gen; sb->st_blocks = vap->va_bytes / S_BLKSIZE; return (0); } /* * File table vnode ioctl routine. */ static int vn_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { struct vattr vattr; struct vnode *vp; int error; vp = fp->f_vnode; switch (vp->v_type) { case VDIR: case VREG: switch (com) { case FIONREAD: vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, active_cred); VOP_UNLOCK(vp, 0); if (error == 0) *(int *)data = vattr.va_size - fp->f_offset; return (error); case FIONBIO: case FIOASYNC: return (0); default: return (VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td)); } break; case VCHR: return (VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td)); default: return (ENOTTY); } } /* * File table vnode poll routine. */ static int vn_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct vnode *vp; int error; vp = fp->f_vnode; #ifdef MAC vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(vp); error = mac_vnode_check_poll(active_cred, fp->f_cred, vp); VOP_UNLOCK(vp, 0); if (!error) #endif error = VOP_POLL(vp, events, fp->f_cred, td); return (error); } /* * Acquire the requested lock and then check for validity. LK_RETRY * permits vn_lock to return doomed vnodes. */ int _vn_lock(struct vnode *vp, int flags, char *file, int line) { int error; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vn_lock: no locktype")); VNASSERT(vp->v_holdcnt != 0, vp, ("vn_lock: zero hold count")); retry: error = VOP_LOCK1(vp, flags, file, line); flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */ KASSERT((flags & LK_RETRY) == 0 || error == 0, ("vn_lock: error %d incompatible with flags %#x", error, flags)); if ((flags & LK_RETRY) == 0) { if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) { VOP_UNLOCK(vp, 0); error = ENOENT; } } else if (error != 0) goto retry; return (error); } /* * File table vnode close routine. */ static int vn_closefile(struct file *fp, struct thread *td) { struct vnode *vp; struct flock lf; int error; bool ref; vp = fp->f_vnode; fp->f_ops = &badfileops; ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref); if (__predict_false(ref)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); vrele(vp); } return (error); } static bool vn_suspendable(struct mount *mp) { return (mp->mnt_op->vfs_susp_clean != NULL); } /* * Preparing to start a filesystem write operation. If the operation is * permitted, then we bump the count of operations in progress and * proceed. If a suspend request is in progress, we wait until the * suspension is over, and then proceed. */ static int vn_start_write_locked(struct mount *mp, int flags) { int error, mflags; mtx_assert(MNT_MTX(mp), MA_OWNED); error = 0; /* * Check on status of suspension. */ if ((curthread->td_pflags & TDP_IGNSUSP) == 0 || mp->mnt_susp_owner != curthread) { mflags = ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0) | (PUSER - 1); while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { if (flags & V_NOWAIT) { error = EWOULDBLOCK; goto unlock; } error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags, "suspfs", 0); if (error) goto unlock; } } if (flags & V_XSLEEP) goto unlock; mp->mnt_writeopcount++; unlock: if (error != 0 || (flags & V_XSLEEP) != 0) MNT_REL(mp); MNT_IUNLOCK(mp); return (error); } int vn_start_write(struct vnode *vp, struct mount **mpp, int flags) { struct mount *mp; int error; KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL), ("V_MNTREF requires mp")); error = 0; /* * If a vnode is provided, get and return the mount point that * to which it will write. */ if (vp != NULL) { if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); return (0); } } if ((mp = *mpp) == NULL) return (0); if (!vn_suspendable(mp)) { if (vp != NULL || (flags & V_MNTREF) != 0) vfs_rel(mp); return (0); } /* * VOP_GETWRITEMOUNT() returns with the mp refcount held through * a vfs_ref(). * As long as a vnode is not provided we need to acquire a * refcount for the provided mountpoint too, in order to * emulate a vfs_ref(). */ MNT_ILOCK(mp); if (vp == NULL && (flags & V_MNTREF) == 0) MNT_REF(mp); return (vn_start_write_locked(mp, flags)); } /* * Secondary suspension. Used by operations such as vop_inactive * routines that are needed by the higher level functions. These * are allowed to proceed until all the higher level functions have * completed (indicated by mnt_writeopcount dropping to zero). At that * time, these operations are halted until the suspension is over. */ int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags) { struct mount *mp; int error; KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL), ("V_MNTREF requires mp")); retry: if (vp != NULL) { if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); return (0); } } /* * If we are not suspended or have not yet reached suspended * mode, then let the operation proceed. */ if ((mp = *mpp) == NULL) return (0); if (!vn_suspendable(mp)) { if (vp != NULL || (flags & V_MNTREF) != 0) vfs_rel(mp); return (0); } /* * VOP_GETWRITEMOUNT() returns with the mp refcount held through * a vfs_ref(). * As long as a vnode is not provided we need to acquire a * refcount for the provided mountpoint too, in order to * emulate a vfs_ref(). */ MNT_ILOCK(mp); if (vp == NULL && (flags & V_MNTREF) == 0) MNT_REF(mp); if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) { mp->mnt_secondary_writes++; mp->mnt_secondary_accwrites++; MNT_IUNLOCK(mp); return (0); } if (flags & V_NOWAIT) { MNT_REL(mp); MNT_IUNLOCK(mp); return (EWOULDBLOCK); } /* * Wait for the suspension to finish. */ error = msleep(&mp->mnt_flag, MNT_MTX(mp), (PUSER - 1) | PDROP | ((mp->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0 ? (flags & PCATCH) : 0), "suspfs", 0); vfs_rel(mp); if (error == 0) goto retry; return (error); } /* * Filesystem write operation has completed. If we are suspending and this * operation is the last one, notify the suspender that the suspension is * now in effect. */ void vn_finished_write(struct mount *mp) { if (mp == NULL || !vn_suspendable(mp)) return; MNT_ILOCK(mp); MNT_REL(mp); mp->mnt_writeopcount--; if (mp->mnt_writeopcount < 0) panic("vn_finished_write: neg cnt"); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && mp->mnt_writeopcount <= 0) wakeup(&mp->mnt_writeopcount); MNT_IUNLOCK(mp); } /* * Filesystem secondary write operation has completed. If we are * suspending and this operation is the last one, notify the suspender * that the suspension is now in effect. */ void vn_finished_secondary_write(struct mount *mp) { if (mp == NULL || !vn_suspendable(mp)) return; MNT_ILOCK(mp); MNT_REL(mp); mp->mnt_secondary_writes--; if (mp->mnt_secondary_writes < 0) panic("vn_finished_secondary_write: neg cnt"); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && mp->mnt_secondary_writes <= 0) wakeup(&mp->mnt_secondary_writes); MNT_IUNLOCK(mp); } /* * Request a filesystem to suspend write operations. */ int vfs_write_suspend(struct mount *mp, int flags) { int error; MPASS(vn_suspendable(mp)); MNT_ILOCK(mp); if (mp->mnt_susp_owner == curthread) { MNT_IUNLOCK(mp); return (EALREADY); } while (mp->mnt_kern_flag & MNTK_SUSPEND) msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0); /* * Unmount holds a write reference on the mount point. If we * own busy reference and drain for writers, we deadlock with * the reference draining in the unmount path. Callers of * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if * vfs_busy() reference is owned and caller is not in the * unmount context. */ if ((flags & VS_SKIP_UNMOUNT) != 0 && (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); return (EBUSY); } mp->mnt_kern_flag |= MNTK_SUSPEND; mp->mnt_susp_owner = curthread; if (mp->mnt_writeopcount > 0) (void) msleep(&mp->mnt_writeopcount, MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0); else MNT_IUNLOCK(mp); if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0) vfs_write_resume(mp, 0); return (error); } /* * Request a filesystem to resume write operations. */ void vfs_write_resume(struct mount *mp, int flags) { MPASS(vn_suspendable(mp)); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner")); mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 | MNTK_SUSPENDED); mp->mnt_susp_owner = NULL; wakeup(&mp->mnt_writeopcount); wakeup(&mp->mnt_flag); curthread->td_pflags &= ~TDP_IGNSUSP; if ((flags & VR_START_WRITE) != 0) { MNT_REF(mp); mp->mnt_writeopcount++; } MNT_IUNLOCK(mp); if ((flags & VR_NO_SUSPCLR) == 0) VFS_SUSP_CLEAN(mp); } else if ((flags & VR_START_WRITE) != 0) { MNT_REF(mp); vn_start_write_locked(mp, 0); } else { MNT_IUNLOCK(mp); } } /* * Helper loop around vfs_write_suspend() for filesystem unmount VFS * methods. */ int vfs_write_suspend_umnt(struct mount *mp) { int error; MPASS(vn_suspendable(mp)); KASSERT((curthread->td_pflags & TDP_IGNSUSP) == 0, ("vfs_write_suspend_umnt: recursed")); /* dounmount() already called vn_start_write(). */ for (;;) { vn_finished_write(mp); error = vfs_write_suspend(mp, 0); if (error != 0) { vn_start_write(NULL, &mp, V_WAIT); return (error); } MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_SUSPENDED) != 0) break; MNT_IUNLOCK(mp); vn_start_write(NULL, &mp, V_WAIT); } mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); curthread->td_pflags |= TDP_IGNSUSP; return (0); } /* * Implement kqueues for files by translating it to vnode operation. */ static int vn_kqfilter(struct file *fp, struct knote *kn) { return (VOP_KQFILTER(fp->f_vnode, kn)); } /* * Simplified in-kernel wrapper calls for extended attribute access. * Both calls pass in a NULL credential, authorizing as "kernel" access. * Set IO_NODELOCKED in ioflg if the vnode is already locked. */ int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int *buflen, char *buf, struct thread *td) { struct uio auio; struct iovec iov; int error; iov.iov_len = *buflen; iov.iov_base = buf; auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = *buflen; if ((ioflg & IO_NODELOCKED) == 0) vn_lock(vp, LK_SHARED | LK_RETRY); ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute retrieval as kernel */ error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) VOP_UNLOCK(vp, 0); if (error == 0) { *buflen = *buflen - auio.uio_resid; } return (error); } /* * XXX failure mode if partially written? */ int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int buflen, char *buf, struct thread *td) { struct uio auio; struct iovec iov; struct mount *mp; int error; iov.iov_len = buflen; iov.iov_base = buf; auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = buflen; if ((ioflg & IO_NODELOCKED) == 0) { if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute setting as kernel */ error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) { vn_finished_write(mp); VOP_UNLOCK(vp, 0); } return (error); } int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, struct thread *td) { struct mount *mp; int error; if ((ioflg & IO_NODELOCKED) == 0) { if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held"); /* authorize attribute removal as kernel */ error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td); if ((ioflg & IO_NODELOCKED) == 0) { vn_finished_write(mp); VOP_UNLOCK(vp, 0); } return (error); } static int vn_get_ino_alloc_vget(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { return (VFS_VGET(mp, *(ino_t *)arg, lkflags, rvp)); } int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp) { return (vn_vget_ino_gen(vp, vn_get_ino_alloc_vget, &ino, lkflags, rvp)); } int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg, int lkflags, struct vnode **rvp) { struct mount *mp; int ltype, error; ASSERT_VOP_LOCKED(vp, "vn_vget_ino_get"); mp = vp->v_mount; ltype = VOP_ISLOCKED(vp); KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED, ("vn_vget_ino: vp not locked")); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp, 0); error = vfs_busy(mp, 0); vn_lock(vp, ltype | LK_RETRY); vfs_rel(mp); if (error != 0) return (ENOENT); if (vp->v_iflag & VI_DOOMED) { vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp, 0); error = alloc(mp, alloc_arg, lkflags, rvp); vfs_unbusy(mp); if (*rvp != vp) vn_lock(vp, ltype | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { if (error == 0) { if (*rvp == vp) vunref(vp); else vput(*rvp); } error = ENOENT; } return (error); } int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td) { if (vp->v_type != VREG || td == NULL) return (0); if ((uoff_t)uio->uio_offset + uio->uio_resid > lim_cur(td, RLIMIT_FSIZE)) { PROC_LOCK(td->td_proc); kern_psignal(td->td_proc, SIGXFSZ); PROC_UNLOCK(td->td_proc); return (EFBIG); } return (0); } int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct vnode *vp; vp = fp->f_vnode; #ifdef AUDIT vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif return (setfmode(td, active_cred, vp, mode)); } int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct vnode *vp; vp = fp->f_vnode; #ifdef AUDIT vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif return (setfown(td, active_cred, vp, uid, gid)); } void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) { vm_object_t object; if ((object = vp->v_object) == NULL) return; VM_OBJECT_WLOCK(object); vm_object_page_remove(object, start, end, 0); VM_OBJECT_WUNLOCK(object); } int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) { struct vattr va; daddr_t bn, bnp; uint64_t bsize; off_t noff; int error; KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, ("Wrong command %lu", cmd)); if (vn_lock(vp, LK_SHARED) != 0) return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; goto unlock; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) goto unlock; noff = *off; if (noff >= va.va_size) { error = ENXIO; goto unlock; } bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize) { error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; goto unlock; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; goto unlock; } } if (noff > va.va_size) noff = va.va_size; /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; unlock: VOP_UNLOCK(vp, 0); if (error == 0) *off = noff; return (error); } int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { struct ucred *cred; struct vnode *vp; struct vattr vattr; off_t foffset, size; int error, noneg; cred = td->td_ucred; vp = fp->f_vnode; foffset = foffset_lock(fp, 0); noneg = (vp->v_type != VCHR); error = 0; switch (whence) { case L_INCR: if (noneg && (foffset < 0 || (offset > 0 && foffset > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += foffset; break; case L_XTND: vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); if (error) break; /* * If the file references a disk device, then fetch * the media size and use that to determine the ending * offset. */ if (vattr.va_size == 0 && vp->v_type == VCHR && fo_ioctl(fp, DIOCGMEDIASIZE, &size, cred, td) == 0) vattr.va_size = size; if (noneg && (vattr.va_size > OFF_MAX || (offset > 0 && vattr.va_size > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += vattr.va_size; break; case L_SET: break; case SEEK_DATA: error = fo_ioctl(fp, FIOSEEKDATA, &offset, cred, td); break; case SEEK_HOLE: error = fo_ioctl(fp, FIOSEEKHOLE, &offset, cred, td); break; default: error = EINVAL; } if (error == 0 && noneg && offset < 0) error = EINVAL; if (error != 0) goto drop; VFS_KNOTE_UNLOCKED(vp, 0); td->td_uretoff.tdu_off = offset; drop: foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); return (error); } int vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { int error; /* * Grant permission if the caller is the owner of the file, or * the super-user, or has ACL_WRITE_ATTRIBUTES permission on * on the file. If the time pointer is null, then write * permission on the file is also sufficient. * * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes: * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES * will be allowed to set the times [..] to the current * server time. */ error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0) error = VOP_ACCESS(vp, VWRITE, cred, td); return (error); } int vn_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct vnode *vp; int error; if (fp->f_type == DTYPE_FIFO) kif->kf_type = KF_TYPE_FIFO; else kif->kf_type = KF_TYPE_VNODE; vp = fp->f_vnode; vref(vp); FILEDESC_SUNLOCK(fdp); error = vn_fill_kinfo_vnode(vp, kif); vrele(vp); FILEDESC_SLOCK(fdp); return (error); } static inline void vn_fill_junk(struct kinfo_file *kif) { size_t len, olen; /* * Simulate vn_fullpath returning changing values for a given * vp during e.g. coredump. */ len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1; olen = strlen(kif->kf_path); if (len < olen) strcpy(&kif->kf_path[len - 1], "$"); else for (; olen < len; olen++) strcpy(&kif->kf_path[olen], "A"); } int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif) { struct vattr va; char *fullpath, *freepath; int error; - kif->kf_vnode_type = vntype_to_kinfo(vp->v_type); + kif->kf_un.kf_file.kf_file_type = vntype_to_kinfo(vp->v_type); freepath = NULL; fullpath = "-"; error = vn_fullpath(curthread, vp, &fullpath, &freepath); if (error == 0) { strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path)); } if (freepath != NULL) free(freepath, M_TEMP); KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path, vn_fill_junk(kif); ); /* * Retrieve vnode attributes. */ va.va_fsid = VNOVAL; va.va_rdev = NODEV; vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &va, curthread->td_ucred); VOP_UNLOCK(vp, 0); if (error != 0) return (error); if (va.va_fsid != VNOVAL) kif->kf_un.kf_file.kf_file_fsid = va.va_fsid; else kif->kf_un.kf_file.kf_file_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + kif->kf_un.kf_file.kf_file_fsid_freebsd11 = + kif->kf_un.kf_file.kf_file_fsid; /* truncate */ kif->kf_un.kf_file.kf_file_fileid = va.va_fileid; kif->kf_un.kf_file.kf_file_mode = MAKEIMODE(va.va_type, va.va_mode); kif->kf_un.kf_file.kf_file_size = va.va_size; kif->kf_un.kf_file.kf_file_rdev = va.va_rdev; + kif->kf_un.kf_file.kf_file_rdev_freebsd11 = + kif->kf_un.kf_file.kf_file_rdev; /* truncate */ return (0); } int vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { #ifdef HWPMC_HOOKS struct pmckern_map_in pkm; #endif struct mount *mp; struct vnode *vp; vm_object_t object; vm_prot_t maxprot; boolean_t writecounted; int error; #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support * read(2)/write(2) -- or even open(2). Thus, we can * use MAP_ASYNC to trade on-disk coherence for speed. * The shm_open(3) library routine turns on the FPOSIXSHM * flag to request this behavior. */ if ((fp->f_flag & FPOSIXSHM) != 0) flags |= MAP_NOSYNC; #endif vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { maxprot = VM_PROT_NONE; if ((prot & VM_PROT_EXECUTE) != 0) return (EACCES); } else maxprot = VM_PROT_EXECUTE; if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_READ; else if ((prot & VM_PROT_READ) != 0) return (EACCES); /* * If we are sharing potential changes via MAP_SHARED and we * are trying to get write permission although we opened it * without asking for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) maxprot |= VM_PROT_WRITE; else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } else { maxprot |= VM_PROT_WRITE; cap_maxprot |= VM_PROT_WRITE; } maxprot &= cap_maxprot; /* * For regular files and shared memory, POSIX requires that * the value of foff be a legitimate offset within the data * object. In particular, negative offsets are invalid. * Blocking negative offsets and overflows here avoids * possible wraparound or user-level access into reserved * ranges of the data object later. In contrast, POSIX does * not dictate how offsets are used by device drivers, so in * the case of a device mapping a negative offset is passed * on. */ if ( #ifdef _LP64 size > OFF_MAX || #endif foff < 0 || foff > OFF_MAX - size) return (EINVAL); writecounted = FALSE; error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, &foff, &object, &writecounted); if (error != 0) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, writecounted, td); if (error != 0) { /* * If this mapping was accounted for in the vnode's * writecount, then undo that now. */ if (writecounted) vnode_pager_release_writecount(object, 0, size); vm_object_deallocate(object); } #ifdef HWPMC_HOOKS /* Inform hwpmc(4) if an executable is being mapped. */ if (PMC_HOOK_INSTALLED(PMC_FN_MMAP)) { if ((prot & VM_PROT_EXECUTE) != 0 && error == 0) { pkm.pm_file = vp; pkm.pm_address = (uintptr_t) *addr; PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); } } #endif return (error); } Index: head/sys/nlm/nlm_advlock.c =================================================================== --- head/sys/nlm/nlm_advlock.c (revision 318735) +++ head/sys/nlm/nlm_advlock.c (revision 318736) @@ -1,1273 +1,1274 @@ /*- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * We need to keep track of the svid values used for F_FLOCK locks. */ struct nlm_file_svid { int ns_refs; /* thread count + 1 if active */ int ns_svid; /* on-the-wire SVID for this file */ struct ucred *ns_ucred; /* creds to use for lock recovery */ void *ns_id; /* local struct file pointer */ bool_t ns_active; /* TRUE if we own a lock */ LIST_ENTRY(nlm_file_svid) ns_link; }; LIST_HEAD(nlm_file_svid_list, nlm_file_svid); #define NLM_SVID_HASH_SIZE 256 struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE]; struct mtx nlm_svid_lock; static struct unrhdr *nlm_svid_allocator; static volatile u_int nlm_xid = 1; static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim); static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size); static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size); static int nlm_map_status(nlm4_stats stat); static struct nlm_file_svid *nlm_find_svid(void *id); static void nlm_free_svid(struct nlm_file_svid *nf); static int nlm_init_lock(struct flock *fl, int flags, int svid, rpcvers_t vers, size_t fhlen, void *fh, off_t size, struct nlm4_lock *lock, char oh_space[32]); static void nlm_client_init(void *dummy) { int i; mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF); /* pid_max cannot be greater than PID_MAX */ nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock); for (i = 0; i < NLM_SVID_HASH_SIZE; i++) LIST_INIT(&nlm_file_svids[i]); } SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL); static int nlm_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } struct nlm_feedback_arg { bool_t nf_printed; struct nfsmount *nf_nmp; }; static void nlm_down(struct nlm_feedback_arg *nf, struct thread *td, const char *msg, int error) { struct nfsmount *nmp = nf->nf_nmp; if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else { mtx_unlock(&nmp->nm_mtx); } nf->nf_printed = TRUE; nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nlm_up(struct nlm_feedback_arg *nf, struct thread *td, const char *msg) { struct nfsmount *nmp = nf->nf_nmp; if (!nf->nf_printed) return; nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_LOCKTIMEO) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else { mtx_unlock(&nmp->nm_mtx); } } static void nlm_feedback(int type, int proc, void *arg) { struct thread *td = curthread; struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: nlm_down(nf, td, "lockd not responding", 0); break; case FEEDBACK_OK: nlm_up(nf, td, "lockd is alive again"); break; } } /* * nlm_advlock -- * NFS advisory byte-level locks. */ static int nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, int flags, bool_t reclaim, bool_t unlock_vp) { struct thread *td = curthread; struct nfsmount *nmp; off_t size; size_t fhlen; union nfsfh fh; struct sockaddr *sa; struct sockaddr_storage ss; - char servername[MNAMELEN]; + char *servername; struct timeval timo; int retries; rpcvers_t vers; struct nlm_host *host; struct rpc_callextra ext; struct nlm_feedback_arg nf; AUTH *auth; struct ucred *cred, *cred1; struct nlm_file_svid *ns; int svid; int error; int is_v3; ASSERT_VOP_LOCKED(vp, "nlm_advlock_1"); + servername = malloc(MNAMELEN, M_TEMP, M_WAITOK); /* XXXKIB vp locked */ nmp = VFSTONFS(vp->v_mount); /* * Push any pending writes to the server and flush our cache * so that if we are contending with another machine for a * file, we get whatever they wrote and vice-versa. */ if (op == F_SETLK || op == F_UNLCK) nmp->nm_vinvalbuf(vp, V_SAVE, td, 1); strcpy(servername, nmp->nm_hostname); nmp->nm_getinfo(vp, fh.fh_bytes, &fhlen, &ss, &is_v3, &size, &timo); sa = (struct sockaddr *) &ss; if (is_v3 != 0) vers = NLM_VERS4; else vers = NLM_VERS; if (nmp->nm_flag & NFSMNT_SOFT) retries = nmp->nm_retry; else retries = INT_MAX; /* * We need to switch to mount-point creds so that we can send * packets from a privileged port. Reference mnt_cred and * switch to them before unlocking the vnode, since mount * point could be unmounted right after unlock. */ cred = td->td_ucred; td->td_ucred = vp->v_mount->mnt_cred; crhold(td->td_ucred); if (unlock_vp) VOP_UNLOCK(vp, 0); host = nlm_find_host_by_name(servername, sa, vers); auth = authunix_create(cred); memset(&ext, 0, sizeof(ext)); nf.nf_printed = FALSE; nf.nf_nmp = nmp; ext.rc_auth = auth; ext.rc_feedback = nlm_feedback; ext.rc_feedback_arg = &nf; ext.rc_timers = NULL; ns = NULL; if (flags & F_FLOCK) { ns = nlm_find_svid(id); KASSERT(fl->l_start == 0 && fl->l_len == 0, ("F_FLOCK lock requests must be whole-file locks")); if (!ns->ns_ucred) { /* * Remember the creds used for locking in case * we need to recover the lock later. */ ns->ns_ucred = crdup(cred); } svid = ns->ns_svid; } else if (flags & F_REMOTE) { /* * If we are recovering after a server restart or * trashing locks on a force unmount, use the same * svid as last time. */ svid = fl->l_pid; } else { svid = ((struct proc *) id)->p_pid; } switch(op) { case F_SETLK: if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT) && fl->l_type == F_WRLCK) { /* * The semantics for flock(2) require that any * shared lock on the file must be released * before an exclusive lock is granted. The * local locking code interprets this by * unlocking the file before sleeping on a * blocked exclusive lock request. We * approximate this by first attempting * non-blocking and if that fails, we unlock * the file and block. */ error = nlm_setlock(host, &ext, vers, &timo, retries, vp, F_SETLK, fl, flags & ~F_WAIT, svid, fhlen, &fh.fh_bytes, size, reclaim); if (error == EAGAIN) { fl->l_type = F_UNLCK; error = nlm_clearlock(host, &ext, vers, &timo, retries, vp, F_UNLCK, fl, flags, svid, fhlen, &fh.fh_bytes, size); fl->l_type = F_WRLCK; if (!error) { mtx_lock(&nlm_svid_lock); if (ns->ns_active) { ns->ns_refs--; ns->ns_active = FALSE; } mtx_unlock(&nlm_svid_lock); flags |= F_WAIT; error = nlm_setlock(host, &ext, vers, &timo, retries, vp, F_SETLK, fl, flags, svid, fhlen, &fh.fh_bytes, size, reclaim); } } } else { error = nlm_setlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size, reclaim); } if (!error && ns) { mtx_lock(&nlm_svid_lock); if (!ns->ns_active) { /* * Add one to the reference count to * hold onto the SVID for the lifetime * of the lock. Note that since * F_FLOCK only supports whole-file * locks, there can only be one active * lock for this SVID. */ ns->ns_refs++; ns->ns_active = TRUE; } mtx_unlock(&nlm_svid_lock); } break; case F_UNLCK: error = nlm_clearlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); if (!error && ns) { mtx_lock(&nlm_svid_lock); if (ns->ns_active) { ns->ns_refs--; ns->ns_active = FALSE; } mtx_unlock(&nlm_svid_lock); } break; case F_GETLK: error = nlm_getlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); break; default: error = EINVAL; break; } if (ns) nlm_free_svid(ns); cred1 = td->td_ucred; td->td_ucred = cred; crfree(cred1); AUTH_DESTROY(auth); nlm_host_release(host); - + free(servername, M_TEMP); return (error); } int nlm_advlock(struct vop_advlock_args *ap) { return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags, FALSE, TRUE)); } /* * Set the creds of td to the creds of the given lock's owner. The new * creds reference count will be incremented via crhold. The caller is * responsible for calling crfree and restoring td's original creds. */ static void nlm_set_creds_for_lock(struct thread *td, struct flock *fl) { int i; struct nlm_file_svid *ns; struct proc *p; struct ucred *cred; cred = NULL; if (fl->l_pid > PID_MAX) { /* * If this was originally a F_FLOCK-style lock, we * recorded the creds used when it was originally * locked in the nlm_file_svid structure. */ mtx_lock(&nlm_svid_lock); for (i = 0; i < NLM_SVID_HASH_SIZE; i++) { for (ns = LIST_FIRST(&nlm_file_svids[i]); ns; ns = LIST_NEXT(ns, ns_link)) { if (ns->ns_svid == fl->l_pid) { cred = crhold(ns->ns_ucred); break; } } } mtx_unlock(&nlm_svid_lock); } else { /* * This lock is owned by a process. Get a reference to * the process creds. */ p = pfind(fl->l_pid); if (p) { cred = crhold(p->p_ucred); PROC_UNLOCK(p); } } /* * If we can't find a cred, fall back on the recovery * thread's cred. */ if (!cred) { cred = crhold(td->td_ucred); } td->td_ucred = cred; } static int nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg) { struct flock newfl; struct thread *td = curthread; struct ucred *oldcred; int error; newfl = *fl; newfl.l_type = F_UNLCK; oldcred = td->td_ucred; nlm_set_creds_for_lock(td, &newfl); error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE, FALSE, FALSE); crfree(td->td_ucred); td->td_ucred = oldcred; return (error); } int nlm_reclaim(struct vop_reclaim_args *ap) { nlm_cancel_wait(ap->a_vp); lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL); return (0); } struct nlm_recovery_context { struct nlm_host *nr_host; /* host we are recovering */ int nr_state; /* remote NSM state for recovery */ }; static int nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg) { struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg; struct thread *td = curthread; struct ucred *oldcred; int state, error; /* * If the remote NSM state changes during recovery, the host * must have rebooted a second time. In that case, we must * restart the recovery. */ state = nlm_host_get_state(nr->nr_host); if (nr->nr_state != state) return (ERESTART); error = vn_lock(vp, LK_SHARED); if (error) return (error); oldcred = td->td_ucred; nlm_set_creds_for_lock(td, fl); error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE, TRUE, TRUE); crfree(td->td_ucred); td->td_ucred = oldcred; return (error); } void nlm_client_recovery(struct nlm_host *host) { struct nlm_recovery_context nr; int sysid, error; sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host); do { nr.nr_host = host; nr.nr_state = nlm_host_get_state(host); error = lf_iteratelocks_sysid(sysid, nlm_client_recover_lock, &nr); } while (error == ERESTART); } static void nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) { dst->caller_name = src->caller_name; dst->fh = src->fh; dst->oh = src->oh; dst->svid = src->svid; dst->l_offset = src->l_offset; dst->l_len = src->l_len; } static void nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src) { dst->exclusive = src->exclusive; dst->svid = src->svid; dst->oh = src->oh; dst->l_offset = src->l_offset; dst->l_len = src->l_len; } static void nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src) { dst->cookie = src->cookie; dst->stat.stat = (enum nlm4_stats) src->stat.stat; } static enum clnt_stat nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_test_4(args, res, client, ext, timo); } else { nlm_testargs args1; nlm_testres res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_test_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { res->cookie = res1.cookie; res->stat.stat = (enum nlm4_stats) res1.stat.stat; if (res1.stat.stat == nlm_denied) nlm_convert_to_nlm4_holder( &res->stat.nlm4_testrply_u.holder, &res1.stat.nlm_testrply_u.holder); } return (stat); } } static enum clnt_stat nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_lock_4(args, res, client, ext, timo); } else { nlm_lockargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.block = args->block; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); args1.reclaim = args->reclaim; args1.state = args->state; memset(&res1, 0, sizeof(res1)); stat = nlm_lock_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } static enum clnt_stat nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_cancel_4(args, res, client, ext, timo); } else { nlm_cancargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.block = args->block; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_cancel_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } static enum clnt_stat nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_unlock_4(args, res, client, ext, timo); } else { nlm_unlockargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_unlock_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } /* * Called after a lock request (set or clear) succeeded. We record the * details in the local lock manager. Note that since the remote * server has granted the lock, we can be sure that it doesn't * conflict with any other locks we have in the local lock manager. * * Since it is possible that host may also make NLM client requests to * our NLM server, we use a different sysid value to record our own * client locks. * * Note that since it is possible for us to receive replies from the * server in a different order than the locks were granted (e.g. if * many local threads are contending for the same lock), we must use a * blocking operation when registering with the local lock manager. * We expect that any actual wait will be rare and short hence we * ignore signals for this. */ static void nlm_record_lock(struct vnode *vp, int op, struct flock *fl, int svid, int sysid, off_t size) { struct vop_advlockasync_args a; struct flock newfl; struct proc *p; int error, stops_deferred; a.a_vp = vp; a.a_id = NULL; a.a_op = op; a.a_fl = &newfl; a.a_flags = F_REMOTE|F_WAIT|F_NOINTR; a.a_task = NULL; a.a_cookiep = NULL; newfl.l_start = fl->l_start; newfl.l_len = fl->l_len; newfl.l_type = fl->l_type; newfl.l_whence = fl->l_whence; newfl.l_pid = svid; newfl.l_sysid = NLM_SYSID_CLIENT | sysid; for (;;) { error = lf_advlockasync(&a, &vp->v_lockf, size); if (error == EDEADLK) { /* * Locks are associated with the processes and * not with threads. Suppose we have two * threads A1 A2 in one process, A1 locked * file f1, A2 is locking file f2, and A1 is * unlocking f1. Then remote server may * already unlocked f1, while local still not * yet scheduled A1 to make the call to local * advlock manager. The process B owns lock on * f2 and issued the lock on f1. Remote would * grant B the request on f1, but local would * return EDEADLK. */ pause("nlmdlk", 1); p = curproc; stops_deferred = sigdeferstop(SIGDEFERSTOP_OFF); PROC_LOCK(p); thread_suspend_check(0); PROC_UNLOCK(p); sigallowstop(stops_deferred); } else if (error == EINTR) { /* * lf_purgelocks() might wake up the lock * waiter and removed our lock graph edges. * There is no sense in re-trying recording * the lock to the local manager after * reclaim. */ error = 0; break; } else break; } KASSERT(error == 0 || error == ENOENT, ("Failed to register NFS lock locally - error=%d", error)); } static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim) { struct nlm4_lockargs args; char oh_space[32]; struct nlm4_res res; u_int xid; CLIENT *client; enum clnt_stat stat; int retry, block, exclusive; void *wait_handle = NULL; int error; memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); block = (flags & F_WAIT) ? TRUE : FALSE; exclusive = (fl->l_type == F_WRLCK); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); args.block = block; args.exclusive = exclusive; args.reclaim = reclaim; args.state = nlm_nsm_state; retry = 5*hz; for (;;) { client = nlm_host_get_rpc(host, FALSE); if (!client) return (ENOLCK); /* XXX retry? */ if (block) wait_handle = nlm_register_wait_lock(&args.alock, vp); xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (block) nlm_deregister_wait_lock(wait_handle); if (retries) { retries--; continue; } return (EINVAL); } /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); if (block && res.stat.stat != nlm4_blocked) nlm_deregister_wait_lock(wait_handle); if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ error = tsleep(&args, PCATCH, "nlmgrace", retry); if (error && error != EWOULDBLOCK) return (error); retry = 2*retry; if (retry > 30*hz) retry = 30*hz; continue; } if (block && res.stat.stat == nlm4_blocked) { /* * The server should call us back with a * granted message when the lock succeeds. In * order to deal with broken servers, lost * granted messages and server reboots, we * will also re-try every few seconds. */ error = nlm_wait_lock(wait_handle, retry); if (error == EWOULDBLOCK) { retry = 2*retry; if (retry > 30*hz) retry = 30*hz; continue; } if (error) { /* * We need to call the server to * cancel our lock request. */ nlm4_cancargs cancel; memset(&cancel, 0, sizeof(cancel)); xid = atomic_fetchadd_int(&nlm_xid, 1); cancel.cookie.n_len = sizeof(xid); cancel.cookie.n_bytes = (char*) &xid; cancel.block = block; cancel.exclusive = exclusive; cancel.alock = args.alock; do { client = nlm_host_get_rpc(host, FALSE); if (!client) /* XXX retry? */ return (ENOLCK); stat = nlm_cancel_rpc(vers, &cancel, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { /* * We need to cope * with temporary * network partitions * as well as server * reboots. This means * we have to keep * trying to cancel * until the server * wakes up again. */ pause("nlmcancel", 10*hz); } } while (stat != RPC_SUCCESS); /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); switch (res.stat.stat) { case nlm_denied: /* * There was nothing * to cancel. We are * going to go ahead * and assume we got * the lock. */ error = 0; break; case nlm4_denied_grace_period: /* * The server has * recently rebooted - * treat this as a * successful * cancellation. */ break; case nlm4_granted: /* * We managed to * cancel. */ break; default: /* * Broken server * implementation - * can't really do * anything here. */ break; } } } else { error = nlm_map_status(res.stat.stat); } if (!error && !reclaim) { nlm_record_lock(vp, op, fl, args.alock.svid, nlm_host_get_sysid(host), size); nlm_host_monitor(host, 0); } return (error); } } static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size) { struct nlm4_unlockargs args; char oh_space[32]; struct nlm4_res res; u_int xid; CLIENT *client; enum clnt_stat stat; int error; memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); for (;;) { client = nlm_host_get_rpc(host, FALSE); if (!client) return (ENOLCK); /* XXX retry? */ xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (retries) { retries--; continue; } return (EINVAL); } /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); if (error && error != EWOULDBLOCK) return (error); continue; } /* * If we are being called via nlm_reclaim (which will * use the F_REMOTE flag), don't record the lock * operation in the local lock manager since the vnode * is going away. */ if (!(flags & F_REMOTE)) nlm_record_lock(vp, op, fl, args.alock.svid, nlm_host_get_sysid(host), size); return (0); } } static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size) { struct nlm4_testargs args; char oh_space[32]; struct nlm4_testres res; u_int xid; CLIENT *client; enum clnt_stat stat; int exclusive; int error; KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK")); memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); exclusive = (fl->l_type == F_WRLCK); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); args.exclusive = exclusive; for (;;) { client = nlm_host_get_rpc(host, FALSE); if (!client) return (ENOLCK); /* XXX retry? */ xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (retries) { retries--; continue; } return (EINVAL); } if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ xdr_free((xdrproc_t) xdr_nlm4_testres, &res); error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); if (error && error != EWOULDBLOCK) return (error); continue; } if (res.stat.stat == nlm4_denied) { struct nlm4_holder *h = &res.stat.nlm4_testrply_u.holder; fl->l_start = h->l_offset; fl->l_len = h->l_len; fl->l_pid = h->svid; if (h->exclusive) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; fl->l_whence = SEEK_SET; fl->l_sysid = 0; } else { fl->l_type = F_UNLCK; } xdr_free((xdrproc_t) xdr_nlm4_testres, &res); return (0); } } static int nlm_map_status(nlm4_stats stat) { switch (stat) { case nlm4_granted: return (0); case nlm4_denied: return (EAGAIN); case nlm4_denied_nolocks: return (ENOLCK); case nlm4_deadlck: return (EDEADLK); case nlm4_rofs: return (EROFS); case nlm4_stale_fh: return (ESTALE); case nlm4_fbig: return (EFBIG); case nlm4_failed: return (EACCES); default: return (EINVAL); } } static struct nlm_file_svid * nlm_find_svid(void *id) { struct nlm_file_svid *ns, *newns; int h; h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE; mtx_lock(&nlm_svid_lock); LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { if (ns->ns_id == id) { ns->ns_refs++; break; } } mtx_unlock(&nlm_svid_lock); if (!ns) { int svid = alloc_unr(nlm_svid_allocator); newns = malloc(sizeof(struct nlm_file_svid), M_NLM, M_WAITOK); newns->ns_refs = 1; newns->ns_id = id; newns->ns_svid = svid; newns->ns_ucred = NULL; newns->ns_active = FALSE; /* * We need to check for a race with some other * thread allocating a svid for this file. */ mtx_lock(&nlm_svid_lock); LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { if (ns->ns_id == id) { ns->ns_refs++; break; } } if (ns) { mtx_unlock(&nlm_svid_lock); free_unr(nlm_svid_allocator, newns->ns_svid); free(newns, M_NLM); } else { LIST_INSERT_HEAD(&nlm_file_svids[h], newns, ns_link); ns = newns; mtx_unlock(&nlm_svid_lock); } } return (ns); } static void nlm_free_svid(struct nlm_file_svid *ns) { mtx_lock(&nlm_svid_lock); ns->ns_refs--; if (!ns->ns_refs) { KASSERT(!ns->ns_active, ("Freeing active SVID")); LIST_REMOVE(ns, ns_link); mtx_unlock(&nlm_svid_lock); free_unr(nlm_svid_allocator, ns->ns_svid); if (ns->ns_ucred) crfree(ns->ns_ucred); free(ns, M_NLM); } else { mtx_unlock(&nlm_svid_lock); } } static int nlm_init_lock(struct flock *fl, int flags, int svid, rpcvers_t vers, size_t fhlen, void *fh, off_t size, struct nlm4_lock *lock, char oh_space[32]) { size_t oh_len; off_t start, len; if (fl->l_whence == SEEK_END) { if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) return (EOVERFLOW); start = size + fl->l_start; } else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) { start = fl->l_start; } else { return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len < 0) { len = -fl->l_len; start -= len; if (start < 0) return (EINVAL); } else { len = fl->l_len; } if (vers == NLM_VERS) { /* * Enforce range limits on V1 locks */ if (start > 0xffffffffLL || len > 0xffffffffLL) return (EOVERFLOW); } snprintf(oh_space, 32, "%d@", svid); oh_len = strlen(oh_space); getcredhostname(NULL, oh_space + oh_len, 32 - oh_len); oh_len = strlen(oh_space); memset(lock, 0, sizeof(*lock)); lock->caller_name = prison0.pr_hostname; lock->fh.n_len = fhlen; lock->fh.n_bytes = fh; lock->oh.n_len = oh_len; lock->oh.n_bytes = oh_space; lock->svid = svid; lock->l_offset = start; lock->l_len = len; return (0); } Index: head/sys/security/audit/audit_private.h =================================================================== --- head/sys/security/audit/audit_private.h (revision 318735) +++ head/sys/security/audit/audit_private.h (revision 318736) @@ -1,500 +1,500 @@ /*- * Copyright (c) 1999-2009 Apple Inc. * Copyright (c) 2016-2017 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * This include file contains function prototypes and type definitions used * within the audit implementation. */ #ifndef _SECURITY_AUDIT_PRIVATE_H_ #define _SECURITY_AUDIT_PRIVATE_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif #include #include #include #include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_AUDITBSM); MALLOC_DECLARE(M_AUDITDATA); MALLOC_DECLARE(M_AUDITPATH); MALLOC_DECLARE(M_AUDITTEXT); MALLOC_DECLARE(M_AUDITGIDSET); #endif /* * Audit control variables that are usually set/read via system calls and * used to control various aspects of auditing. */ extern struct au_qctrl audit_qctrl; extern struct audit_fstat audit_fstat; extern struct au_mask audit_nae_mask; extern int audit_panic_on_write_fail; extern int audit_fail_stop; extern int audit_argv; extern int audit_arge; /* * Success/failure conditions for the conversion of a kernel audit record to * BSM format. */ #define BSM_SUCCESS 0 #define BSM_FAILURE 1 #define BSM_NOAUDIT 2 /* * Defines for the kernel audit record k_ar_commit field. Flags are set to * indicate what sort of record it is, and which preselection mechanism * selected it. */ #define AR_COMMIT_KERNEL 0x00000001U #define AR_COMMIT_USER 0x00000010U #define AR_PRESELECT_TRAIL 0x00001000U #define AR_PRESELECT_PIPE 0x00002000U #define AR_PRESELECT_USER_TRAIL 0x00004000U #define AR_PRESELECT_USER_PIPE 0x00008000U #define AR_PRESELECT_DTRACE 0x00010000U /* * Audit data is generated as a stream of struct audit_record structures, * linked by struct kaudit_record, and contain storage for possible audit so * that it will not need to be allocated during the processing of a system * call, both improving efficiency and avoiding sleeping at untimely moments. * This structure is converted to BSM format before being written to disk. */ struct vnode_au_info { mode_t vn_mode; uid_t vn_uid; gid_t vn_gid; - dev_t vn_dev; - long vn_fsid; - long vn_fileid; + u_int32_t vn_dev; /* XXX dev_t compatibility */ + long vn_fsid; /* XXX uint64_t compatibility */ + long vn_fileid; /* XXX ino_t compatibility */ long vn_gen; }; struct groupset { gid_t *gidset; u_int gidset_size; }; struct socket_au_info { int so_domain; int so_type; int so_protocol; in_addr_t so_raddr; /* Remote address if INET socket. */ in_addr_t so_laddr; /* Local address if INET socket. */ u_short so_rport; /* Remote port. */ u_short so_lport; /* Local port. */ }; /* * The following is used for A_OLDSETQCTRL and AU_OLDGETQCTRL and a 64-bit * userland. */ struct au_qctrl64 { u_int64_t aq64_hiwater; u_int64_t aq64_lowater; u_int64_t aq64_bufsz; u_int64_t aq64_delay; u_int64_t aq64_minfree; }; typedef struct au_qctrl64 au_qctrl64_t; union auditon_udata { char *au_path; int au_cond; int au_flags; int au_policy; int au_trigger; int64_t au_cond64; int64_t au_policy64; au_evclass_map_t au_evclass; au_mask_t au_mask; auditinfo_t au_auinfo; auditpinfo_t au_aupinfo; auditpinfo_addr_t au_aupinfo_addr; au_qctrl_t au_qctrl; au_qctrl64_t au_qctrl64; au_stat_t au_stat; au_fstat_t au_fstat; auditinfo_addr_t au_kau_info; au_evname_map_t au_evname; }; struct posix_ipc_perm { uid_t pipc_uid; gid_t pipc_gid; mode_t pipc_mode; }; struct audit_record { /* Audit record header. */ u_int32_t ar_magic; int ar_event; int ar_retval; /* value returned to the process */ int ar_errno; /* return status of system call */ struct timespec ar_starttime; struct timespec ar_endtime; u_int64_t ar_valid_arg; /* Bitmask of valid arguments */ /* Audit subject information. */ struct xucred ar_subj_cred; uid_t ar_subj_ruid; gid_t ar_subj_rgid; gid_t ar_subj_egid; uid_t ar_subj_auid; /* Audit user ID */ pid_t ar_subj_asid; /* Audit session ID */ pid_t ar_subj_pid; struct au_tid ar_subj_term; struct au_tid_addr ar_subj_term_addr; struct au_mask ar_subj_amask; /* Operation arguments. */ uid_t ar_arg_euid; uid_t ar_arg_ruid; uid_t ar_arg_suid; gid_t ar_arg_egid; gid_t ar_arg_rgid; gid_t ar_arg_sgid; pid_t ar_arg_pid; pid_t ar_arg_asid; struct au_tid ar_arg_termid; struct au_tid_addr ar_arg_termid_addr; uid_t ar_arg_uid; uid_t ar_arg_auid; gid_t ar_arg_gid; struct groupset ar_arg_groups; int ar_arg_fd; int ar_arg_atfd1; int ar_arg_atfd2; int ar_arg_fflags; mode_t ar_arg_mode; - int ar_arg_dev; + int ar_arg_dev; /* XXX dev_t compatibility */ long ar_arg_value; void *ar_arg_addr; int ar_arg_len; int ar_arg_mask; u_int ar_arg_signum; char ar_arg_login[MAXLOGNAME]; int ar_arg_ctlname[CTL_MAXNAME]; struct socket_au_info ar_arg_sockinfo; char *ar_arg_upath1; char *ar_arg_upath2; char *ar_arg_text; struct au_mask ar_arg_amask; struct vnode_au_info ar_arg_vnode1; struct vnode_au_info ar_arg_vnode2; int ar_arg_cmd; int ar_arg_svipc_which; int ar_arg_svipc_cmd; struct ipc_perm ar_arg_svipc_perm; int ar_arg_svipc_id; void *ar_arg_svipc_addr; struct posix_ipc_perm ar_arg_pipc_perm; union auditon_udata ar_arg_auditon; char *ar_arg_argv; int ar_arg_argc; char *ar_arg_envv; int ar_arg_envc; int ar_arg_exitstatus; int ar_arg_exitretval; struct sockaddr_storage ar_arg_sockaddr; cap_rights_t ar_arg_rights; uint32_t ar_arg_fcntl_rights; char ar_jailname[MAXHOSTNAMELEN]; }; /* * Arguments in the audit record are initially not defined; flags are set to * indicate if they are present so they can be included in the audit log * stream only if defined. */ #define ARG_EUID 0x0000000000000001ULL #define ARG_RUID 0x0000000000000002ULL #define ARG_SUID 0x0000000000000004ULL #define ARG_EGID 0x0000000000000008ULL #define ARG_RGID 0x0000000000000010ULL #define ARG_SGID 0x0000000000000020ULL #define ARG_PID 0x0000000000000040ULL #define ARG_UID 0x0000000000000080ULL #define ARG_AUID 0x0000000000000100ULL #define ARG_GID 0x0000000000000200ULL #define ARG_FD 0x0000000000000400ULL #define ARG_POSIX_IPC_PERM 0x0000000000000800ULL #define ARG_FFLAGS 0x0000000000001000ULL #define ARG_MODE 0x0000000000002000ULL #define ARG_DEV 0x0000000000004000ULL #define ARG_ADDR 0x0000000000008000ULL #define ARG_LEN 0x0000000000010000ULL #define ARG_MASK 0x0000000000020000ULL #define ARG_SIGNUM 0x0000000000040000ULL #define ARG_LOGIN 0x0000000000080000ULL #define ARG_SADDRINET 0x0000000000100000ULL #define ARG_SADDRINET6 0x0000000000200000ULL #define ARG_SADDRUNIX 0x0000000000400000ULL #define ARG_TERMID_ADDR 0x0000000000400000ULL #define ARG_UNUSED2 0x0000000001000000ULL #define ARG_UPATH1 0x0000000002000000ULL #define ARG_UPATH2 0x0000000004000000ULL #define ARG_TEXT 0x0000000008000000ULL #define ARG_VNODE1 0x0000000010000000ULL #define ARG_VNODE2 0x0000000020000000ULL #define ARG_SVIPC_CMD 0x0000000040000000ULL #define ARG_SVIPC_PERM 0x0000000080000000ULL #define ARG_SVIPC_ID 0x0000000100000000ULL #define ARG_SVIPC_ADDR 0x0000000200000000ULL #define ARG_GROUPSET 0x0000000400000000ULL #define ARG_CMD 0x0000000800000000ULL #define ARG_SOCKINFO 0x0000001000000000ULL #define ARG_ASID 0x0000002000000000ULL #define ARG_TERMID 0x0000004000000000ULL #define ARG_AUDITON 0x0000008000000000ULL #define ARG_VALUE 0x0000010000000000ULL #define ARG_AMASK 0x0000020000000000ULL #define ARG_CTLNAME 0x0000040000000000ULL #define ARG_PROCESS 0x0000080000000000ULL #define ARG_MACHPORT1 0x0000100000000000ULL #define ARG_MACHPORT2 0x0000200000000000ULL #define ARG_EXIT 0x0000400000000000ULL #define ARG_IOVECSTR 0x0000800000000000ULL #define ARG_ARGV 0x0001000000000000ULL #define ARG_ENVV 0x0002000000000000ULL #define ARG_ATFD1 0x0004000000000000ULL #define ARG_ATFD2 0x0008000000000000ULL #define ARG_RIGHTS 0x0010000000000000ULL #define ARG_FCNTL_RIGHTS 0x0020000000000000ULL #define ARG_SVIPC_WHICH 0x0200000000000000ULL #define ARG_NONE 0x0000000000000000ULL #define ARG_ALL 0xFFFFFFFFFFFFFFFFULL #define ARG_IS_VALID(kar, arg) ((kar)->k_ar.ar_valid_arg & (arg)) #define ARG_SET_VALID(kar, arg) do { \ (kar)->k_ar.ar_valid_arg |= (arg); \ } while (0) #define ARG_CLEAR_VALID(kar, arg) do { \ (kar)->k_ar.ar_valid_arg &= ~(arg); \ } while (0) /* * In-kernel version of audit record; the basic record plus queue meta-data. * This record can also have a pointer set to some opaque data that will be * passed through to the audit writing mechanism. */ struct kaudit_record { struct audit_record k_ar; u_int32_t k_ar_commit; void *k_udata; /* User data. */ u_int k_ulen; /* User data length. */ struct uthread *k_uthread; /* Audited thread. */ void *k_dtaudit_state; TAILQ_ENTRY(kaudit_record) k_q; }; TAILQ_HEAD(kaudit_queue, kaudit_record); /* * Functions to manage the allocation, release, and commit of kernel audit * records. */ void audit_abort(struct kaudit_record *ar); void audit_commit(struct kaudit_record *ar, int error, int retval); struct kaudit_record *audit_new(int event, struct thread *td); /* * Functions relating to the conversion of internal kernel audit records to * the BSM file format. */ struct au_record; int kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau); int bsm_rec_verify(void *rec); /* * Kernel versions of the libbsm audit record functions. */ void kau_free(struct au_record *rec); void kau_init(void); /* * Return values for pre-selection and post-selection decisions. */ #define AU_PRS_SUCCESS 1 #define AU_PRS_FAILURE 2 #define AU_PRS_BOTH (AU_PRS_SUCCESS|AU_PRS_FAILURE) /* * Data structures relating to the kernel audit queue. Ideally, these might * be abstracted so that only accessor methods are exposed. */ extern struct mtx audit_mtx; extern struct cv audit_watermark_cv; extern struct cv audit_worker_cv; extern struct kaudit_queue audit_q; extern int audit_q_len; extern int audit_pre_q_len; extern int audit_in_failure; /* * Flags to use on audit files when opening and closing. */ #define AUDIT_OPEN_FLAGS (FWRITE | O_APPEND) #define AUDIT_CLOSE_FLAGS (FWRITE | O_APPEND) /* * Audit event-to-name mapping structure, maintained in audit_bsm_klib.c. It * appears in this header so that the DTrace audit provider can dereference * instances passed back in the au_evname_foreach() callbacks. Safe access to * its fields requires holding ene_lock (after it is visible in the global * table). * * Locking: * (c) - Constant after inserted in the global table * (l) - Protected by ene_lock * (m) - Protected by evnamemap_lock (audit_bsm_klib.c) * (M) - Writes protected by evnamemap_lock; reads unprotected. */ struct evname_elem { au_event_t ene_event; /* (c) */ char ene_name[EVNAMEMAP_NAME_SIZE]; /* (l) */ LIST_ENTRY(evname_elem) ene_entry; /* (m) */ struct mtx ene_lock; /* DTrace probe IDs; 0 if not yet registered. */ uint32_t ene_commit_probe_id; /* (M) */ uint32_t ene_bsm_probe_id; /* (M) */ /* Flags indicating if the probes enabled or not. */ int ene_commit_probe_enabled; /* (M) */ int ene_bsm_probe_enabled; /* (M) */ }; #define EVNAME_LOCK(ene) mtx_lock(&(ene)->ene_lock) #define EVNAME_UNLOCK(ene) mtx_unlock(&(ene)->ene_lock) /* * Callback function typedef for the same. */ typedef void (*au_evnamemap_callback_t)(struct evname_elem *ene); /* * DTrace audit provider (dtaudit) hooks -- to be set non-NULL when the audit * provider is loaded and ready to be called into. */ extern void *(*dtaudit_hook_preselect)(au_id_t auid, au_event_t event, au_class_t class); extern int (*dtaudit_hook_commit)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf); extern void (*dtaudit_hook_bsm)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf, void *bsm_data, size_t bsm_len); #include #include #include /* * Some of the BSM tokenizer functions take different parameters in the * kernel implementations in order to save the copying of large kernel data * structures. The prototypes of these functions are declared here. */ token_t *kau_to_socket(struct socket_au_info *soi); /* * audit_klib prototypes */ int au_preselect(au_event_t event, au_class_t class, au_mask_t *mask_p, int sorf); void au_evclassmap_init(void); void au_evclassmap_insert(au_event_t event, au_class_t class); au_class_t au_event_class(au_event_t event); void au_evnamemap_init(void); void au_evnamemap_insert(au_event_t event, const char *name); void au_evnamemap_foreach(au_evnamemap_callback_t callback); struct evname_elem *au_evnamemap_lookup(au_event_t event); int au_event_name(au_event_t event, char *name); au_event_t audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg); au_event_t audit_flags_and_error_to_openevent(int oflags, int error); au_event_t audit_flags_and_error_to_openatevent(int oflags, int error); au_event_t audit_msgctl_to_event(int cmd); au_event_t audit_msgsys_to_event(int which); au_event_t audit_semctl_to_event(int cmd); au_event_t audit_semsys_to_event(int which); au_event_t audit_shmsys_to_event(int which); void audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath); au_event_t auditon_command_event(int cmd); /* * Audit trigger events notify user space of kernel audit conditions * asynchronously. */ void audit_trigger_init(void); int audit_send_trigger(unsigned int trigger); /* * Accessor functions to manage global audit state. */ void audit_set_kinfo(struct auditinfo_addr *); void audit_get_kinfo(struct auditinfo_addr *); /* * General audit related functions. */ struct kaudit_record *currecord(void); void audit_free(struct kaudit_record *ar); void audit_shutdown(void *arg, int howto); void audit_rotate_vnode(struct ucred *cred, struct vnode *vp); void audit_worker_init(void); /* * Audit pipe functions. */ int audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class, int sorf, int trail_select); void audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf, int trail_select, void *record, u_int record_len); void audit_pipe_submit_user(void *record, u_int record_len); #endif /* ! _SECURITY_AUDIT_PRIVATE_H_ */ Index: head/sys/sys/_types.h =================================================================== --- head/sys/sys/_types.h (revision 318735) +++ head/sys/sys/_types.h (revision 318736) @@ -1,123 +1,123 @@ /*- * Copyright (c) 2002 Mike Barcroft * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS__TYPES_H_ #define _SYS__TYPES_H_ #include #include /* * Standard type definitions. */ typedef __int32_t __blksize_t; /* file block size */ typedef __int64_t __blkcnt_t; /* file block count */ typedef __int32_t __clockid_t; /* clock_gettime()... */ typedef __uint32_t __fflags_t; /* file flags */ typedef __uint64_t __fsblkcnt_t; typedef __uint64_t __fsfilcnt_t; typedef __uint32_t __gid_t; typedef __int64_t __id_t; /* can hold a gid_t, pid_t, or uid_t */ -typedef __uint32_t __ino_t; /* inode number */ +typedef __uint64_t __ino_t; /* inode number */ typedef long __key_t; /* IPC key (for Sys V IPC) */ typedef __int32_t __lwpid_t; /* Thread ID (a.k.a. LWP) */ typedef __uint16_t __mode_t; /* permissions */ typedef int __accmode_t; /* access permissions */ typedef int __nl_item; -typedef __uint16_t __nlink_t; /* link count */ +typedef __uint64_t __nlink_t; /* link count */ typedef __int64_t __off_t; /* file offset */ typedef __int64_t __off64_t; /* file offset (alias) */ typedef __int32_t __pid_t; /* process [group] */ typedef __int64_t __rlim_t; /* resource limit - intentionally */ /* signed, because of legacy code */ /* that uses -1 for RLIM_INFINITY */ typedef __uint8_t __sa_family_t; typedef __uint32_t __socklen_t; typedef long __suseconds_t; /* microseconds (signed) */ typedef struct __timer *__timer_t; /* timer_gettime()... */ typedef struct __mq *__mqd_t; /* mq_open()... */ typedef __uint32_t __uid_t; typedef unsigned int __useconds_t; /* microseconds (unsigned) */ typedef int __cpuwhich_t; /* which parameter for cpuset. */ typedef int __cpulevel_t; /* level parameter for cpuset. */ typedef int __cpusetid_t; /* cpuset identifier. */ /* * Unusual type definitions. */ /* * rune_t is declared to be an ``int'' instead of the more natural * ``unsigned long'' or ``long''. Two things are happening here. It is not * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, * it looks like 10646 will be a 31 bit standard. This means that if your * ints cannot hold 32 bits, you will be in trouble. The reason an int was * chosen over a long is that the is*() and to*() routines take ints (says * ANSI C), but they use __ct_rune_t instead of int. * * NOTE: rune_t is not covered by ANSI nor other standards, and should not * be instantiated outside of lib/libc/locale. Use wchar_t. wint_t and * rune_t must be the same type. Also, wint_t should be able to hold all * members of the largest character set plus one extra value (WEOF), and * must be at least 16 bits. */ typedef int __ct_rune_t; /* arg type for ctype funcs */ typedef __ct_rune_t __rune_t; /* rune_t (see above) */ typedef __ct_rune_t __wint_t; /* wint_t (see above) */ /* Clang already provides these types as built-ins, but only in C++ mode. */ #if !defined(__clang__) || !defined(__cplusplus) typedef __uint_least16_t __char16_t; typedef __uint_least32_t __char32_t; #endif /* In C++11, char16_t and char32_t are built-in types. */ #if defined(__cplusplus) && __cplusplus >= 201103L #define _CHAR16_T_DECLARED #define _CHAR32_T_DECLARED #endif typedef struct { long long __max_align1 __aligned(_Alignof(long long)); long double __max_align2 __aligned(_Alignof(long double)); } __max_align_t; -typedef __uint32_t __dev_t; /* device number */ +typedef __uint64_t __dev_t; /* device number */ typedef __uint32_t __fixpt_t; /* fixed point number */ /* * mbstate_t is an opaque object to keep conversion state during multibyte * stream conversions. */ typedef union { char __mbstate8[128]; __int64_t _mbstateL; /* for alignment */ } __mbstate_t; typedef __uintmax_t __rman_res_t; #endif /* !_SYS__TYPES_H_ */ Index: head/sys/sys/acct.h =================================================================== --- head/sys/sys/acct.h (revision 318735) +++ head/sys/sys/acct.h (revision 318736) @@ -1,125 +1,150 @@ /*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)acct.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_ACCT_H_ #define _SYS_ACCT_H_ #ifdef _KERNEL #define float uint32_t #endif #define AC_COMM_LEN 16 /* - * Accounting structure version 2 (current). + * Accounting structure version 3 (current). * The first byte is always zero. * Time units are microseconds. */ -struct acctv2 { +struct acctv3 { uint8_t ac_zero; /* zero identifies new version */ uint8_t ac_version; /* record version number */ uint16_t ac_len; /* record length */ char ac_comm[AC_COMM_LEN]; /* command name */ float ac_utime; /* user time */ float ac_stime; /* system time */ float ac_etime; /* elapsed time */ time_t ac_btime; /* starting time */ uid_t ac_uid; /* user id */ gid_t ac_gid; /* group id */ float ac_mem; /* average memory usage */ float ac_io; /* count of IO blocks */ __dev_t ac_tty; /* controlling tty */ - + uint32_t ac_pad0; +#if defined(__powerpc__) && !defined(_LP64) + uint32_t ac_pad1; +#endif uint16_t ac_len2; /* record length */ union { - __dev_t ac_align; /* force v1 compatible alignment */ + uint32_t ac_align; /* force v1 compatible alignment */ #define AFORK 0x01 /* forked but not exec'ed */ /* ASU is no longer supported */ #define ASU 0x02 /* used super-user permissions */ #define ACOMPAT 0x04 /* used compatibility mode */ #define ACORE 0x08 /* dumped core */ #define AXSIG 0x10 /* killed by a signal */ #define ANVER 0x20 /* new record version */ uint8_t ac_flag; /* accounting flags */ } ac_trailer; #define ac_flagx ac_trailer.ac_flag }; +struct acctv2 { + uint8_t ac_zero; /* zero identifies new version */ + uint8_t ac_version; /* record version number */ + uint16_t ac_len; /* record length */ + char ac_comm[AC_COMM_LEN]; /* command name */ + float ac_utime; /* user time */ + float ac_stime; /* system time */ + float ac_etime; /* elapsed time */ + time_t ac_btime; /* starting time */ + uid_t ac_uid; /* user id */ + gid_t ac_gid; /* group id */ + float ac_mem; /* average memory usage */ + float ac_io; /* count of IO blocks */ + uint32_t ac_tty; /* controlling tty */ + + uint16_t ac_len2; /* record length */ + union { + uint32_t ac_align; /* force v1 compatible alignment */ + uint8_t ac_flag; /* accounting flags */ + } ac_trailer; +}; + /* * Legacy accounting structure (rev. 1.5-1.18). * The first byte is always non-zero. * Some fields use a comp_t type which is a 3 bits base 8 * exponent, 13 bit fraction ``floating point'' number. * Units are 1/AHZV1 seconds. */ typedef uint16_t comp_t; struct acctv1 { char ac_comm[AC_COMM_LEN]; /* command name */ comp_t ac_utime; /* user time */ comp_t ac_stime; /* system time */ comp_t ac_etime; /* elapsed time */ time_t ac_btime; /* starting time */ uid_t ac_uid; /* user id */ gid_t ac_gid; /* group id */ uint16_t ac_mem; /* average memory usage */ comp_t ac_io; /* count of IO blocks */ - __dev_t ac_tty; /* controlling tty */ + uint32_t ac_tty; /* controlling tty */ uint8_t ac_flag; /* accounting flags */ }; /* * 1/AHZV1 is the granularity of the data encoded in the comp_t fields. * This is not necessarily equal to hz. */ #define AHZV1 64 #ifdef _KERNEL struct thread; int acct_process(struct thread *td); #undef float #endif #endif /* !_SYS_ACCT_H_ */ Index: head/sys/sys/dirent.h =================================================================== --- head/sys/sys/dirent.h (revision 318735) +++ head/sys/sys/dirent.h (revision 318736) @@ -1,100 +1,129 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dirent.h 8.3 (Berkeley) 8/10/94 * $FreeBSD$ */ #ifndef _SYS_DIRENT_H_ #define _SYS_DIRENT_H_ #include #include +#ifndef _INO_T_DECLARED +typedef __ino_t ino_t; +#define _INO_T_DECLARED +#endif + +#ifndef _OFF_T_DECLARED +typedef __off_t off_t; +#define _OFF_T_DECLARED +#endif + /* * The dirent structure defines the format of directory entries returned by * the getdirentries(2) system call. * * A directory entry has a struct dirent at the front of it, containing its * inode number, the length of the entry, and the length of the name - * contained in the entry. These are followed by the name padded to a 4 + * contained in the entry. These are followed by the name padded to an 8 * byte boundary with null bytes. All names are guaranteed null terminated. * The maximum length of a name in a directory is MAXNAMLEN. + * + * Explicit padding between the last member of the header (d_namelen) and + * d_name avoids ABI padding at the end of dirent on LP64 architectures. + * There is code depending on d_name being last. */ struct dirent { - __uint32_t d_fileno; /* file number of entry */ + ino_t d_fileno; /* file number of entry */ + off_t d_off; /* directory offset of entry */ __uint16_t d_reclen; /* length of this record */ - __uint8_t d_type; /* file type, see below */ - __uint8_t d_namlen; /* length of string in d_name */ + __uint8_t d_type; /* file type, see below */ + __uint8_t d_pad0; + __uint16_t d_namlen; /* length of string in d_name */ + __uint16_t d_pad1; #if __BSD_VISIBLE #define MAXNAMLEN 255 char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */ #else char d_name[255 + 1]; /* name must be no longer than this */ #endif }; +#if defined(_WANT_FREEBSD11_DIRENT) || defined(_KERNEL) +struct freebsd11_dirent { + __uint32_t d_fileno; /* file number of entry */ + __uint16_t d_reclen; /* length of this record */ + __uint8_t d_type; /* file type, see below */ + __uint8_t d_namlen; /* length of string in d_name */ + char d_name[255 + 1]; /* name must be no longer than this */ +}; +#endif /* _WANT_FREEBSD11_DIRENT || _KERNEL */ + #if __BSD_VISIBLE + /* * File types */ #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 /* * Convert between stat structure types and directory types. */ #define IFTODT(mode) (((mode) & 0170000) >> 12) #define DTTOIF(dirtype) ((dirtype) << 12) /* * The _GENERIC_DIRSIZ macro gives the minimum record length which will hold * the directory entry. This returns the amount of space in struct direct * without the d_name field, plus enough space for the name with a terminating - * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. + * null byte (dp->d_namlen+1), rounded up to a 8 byte boundary. * * XXX although this macro is in the implementation namespace, it requires * a manifest constant that is not. */ -#define _GENERIC_DIRSIZ(dp) \ - ((sizeof (struct dirent) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) +#define _GENERIC_DIRLEN(namlen) \ + ((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7) +#define _GENERIC_DIRSIZ(dp) _GENERIC_DIRLEN((dp)->d_namlen) #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp) #endif #endif /* !_SYS_DIRENT_H_ */ Index: head/sys/sys/mount.h =================================================================== --- head/sys/sys/mount.h (revision 318735) +++ head/sys/sys/mount.h (revision 318736) @@ -1,959 +1,987 @@ /*- * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mount.h 8.21 (Berkeley) 5/20/95 * $FreeBSD$ */ #ifndef _SYS_MOUNT_H_ #define _SYS_MOUNT_H_ #include #include #ifdef _KERNEL #include #include #include #include #endif /* * NOTE: When changing statfs structure, mount structure, MNT_* flags or * MNTK_* flags also update DDB show mount command in vfs_subr.c. */ typedef struct fsid { int32_t val[2]; } fsid_t; /* filesystem id type */ /* * File identifier. * These are unique per filesystem on a single machine. */ #define MAXFIDSZ 16 struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_data0; /* force longword alignment */ char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* * filesystem statistics */ #define MFSNAMELEN 16 /* length of type name including null */ -#define MNAMELEN 88 /* size of on/from name bufs */ -#define STATFS_VERSION 0x20030518 /* current version number */ +#define MNAMELEN 1024 /* size of on/from name bufs */ +#define STATFS_VERSION 0x20140518 /* current version number */ struct statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[MFSNAMELEN]; /* filesystem type name */ char f_mntfromname[MNAMELEN]; /* mounted filesystem */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ }; + +#if defined(_WANT_FREEBSD11_STATFS) || defined(_KERNEL) +#define FREEBSD11_STATFS_VERSION 0x20030518 /* current version number */ +struct freebsd11_statfs { + uint32_t f_version; /* structure version number */ + uint32_t f_type; /* type of filesystem */ + uint64_t f_flags; /* copy of mount exported flags */ + uint64_t f_bsize; /* filesystem fragment size */ + uint64_t f_iosize; /* optimal transfer block size */ + uint64_t f_blocks; /* total data blocks in filesystem */ + uint64_t f_bfree; /* free blocks in filesystem */ + int64_t f_bavail; /* free blocks avail to non-superuser */ + uint64_t f_files; /* total file nodes in filesystem */ + int64_t f_ffree; /* free nodes avail to non-superuser */ + uint64_t f_syncwrites; /* count of sync writes since mount */ + uint64_t f_asyncwrites; /* count of async writes since mount */ + uint64_t f_syncreads; /* count of sync reads since mount */ + uint64_t f_asyncreads; /* count of async reads since mount */ + uint64_t f_spare[10]; /* unused spare */ + uint32_t f_namemax; /* maximum filename length */ + uid_t f_owner; /* user that mounted the filesystem */ + fsid_t f_fsid; /* filesystem id */ + char f_charspare[80]; /* spare string space */ + char f_fstypename[16]; /* filesystem type name */ + char f_mntfromname[88]; /* mounted filesystem */ + char f_mntonname[88]; /* directory on which mounted */ +}; +#endif /* _WANT_FREEBSD11_STATFS || _KERNEL */ #ifdef _KERNEL #define OMFSNAMELEN 16 /* length of fs type name, including null */ #define OMNAMELEN (88 - 2 * sizeof(long)) /* size of on/from name bufs */ /* XXX getfsstat.2 is out of date with write and read counter changes here. */ /* XXX statfs.2 is out of date with read counter changes here. */ struct ostatfs { long f_spare2; /* placeholder */ long f_bsize; /* fundamental filesystem block size */ long f_iosize; /* optimal transfer block size */ long f_blocks; /* total data blocks in filesystem */ long f_bfree; /* free blocks in fs */ long f_bavail; /* free blocks avail to non-superuser */ long f_files; /* total file nodes in filesystem */ long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* filesystem id */ uid_t f_owner; /* user that mounted the filesystem */ int f_type; /* type of filesystem */ int f_flags; /* copy of mount exported flags */ long f_syncwrites; /* count of sync writes since mount */ long f_asyncwrites; /* count of async writes since mount */ char f_fstypename[OMFSNAMELEN]; /* fs type name */ char f_mntonname[OMNAMELEN]; /* directory on which mounted */ long f_syncreads; /* count of sync reads since mount */ long f_asyncreads; /* count of async reads since mount */ short f_spares1; /* unused spare */ char f_mntfromname[OMNAMELEN];/* mounted filesystem */ short f_spares2; /* unused spare */ /* * XXX on machines where longs are aligned to 8-byte boundaries, there * is an unnamed int32_t here. This spare was after the apparent end * of the struct until we bit off the read counters from f_mntonname. */ long f_spare[2]; /* unused spare */ }; TAILQ_HEAD(vnodelst, vnode); /* Mount options list */ TAILQ_HEAD(vfsoptlist, vfsopt); struct vfsopt { TAILQ_ENTRY(vfsopt) link; char *name; void *value; int len; int pos; int seen; }; /* * Structure per mounted filesystem. Each mounted filesystem has an * array of operations and an instance record. The filesystems are * put on a doubly linked list. * * Lock reference: * l - mnt_listmtx * m - mountlist_mtx * i - interlock * v - vnode freelist mutex * * Unmarked fields are considered stable as long as a ref is held. * */ struct mount { struct mtx mnt_mtx; /* mount structure interlock */ int mnt_gen; /* struct mount generation */ #define mnt_startzero mnt_list TAILQ_ENTRY(mount) mnt_list; /* (m) mount list */ struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ struct vnode *mnt_syncer; /* syncer vnode */ int mnt_ref; /* (i) Reference count */ struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */ int mnt_nvnodelistsize; /* (i) # of vnodes */ int mnt_writeopcount; /* (i) write syscalls pending */ int mnt_kern_flag; /* (i) kernel only flags */ uint64_t mnt_flag; /* (i) flags shared with user */ struct vfsoptlist *mnt_opt; /* current mount options */ struct vfsoptlist *mnt_optnew; /* new options passed to fs */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ struct ucred *mnt_cred; /* credentials of mounter */ void * mnt_data; /* private data */ time_t mnt_time; /* last time written*/ int mnt_iosize_max; /* max size for clusters, etc */ struct netexport *mnt_export; /* export list */ struct label *mnt_label; /* MAC label for the fs */ u_int mnt_hashseed; /* Random seed for vfs_hash */ int mnt_lockref; /* (i) Lock reference count */ int mnt_secondary_writes; /* (i) # of secondary writes */ int mnt_secondary_accwrites;/* (i) secondary wr. starts */ struct thread *mnt_susp_owner; /* (i) thread owning suspension */ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; struct vnodelst mnt_activevnodelist; /* (l) list of active vnodes */ int mnt_activevnodelistsize;/* (l) # of active vnodes */ struct vnodelst mnt_tmpfreevnodelist; /* (l) list of free vnodes */ int mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */ struct lock mnt_explock; /* vfs_export walkers lock */ TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ }; /* * Definitions for MNT_VNODE_FOREACH_ALL. */ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp); struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp); #define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \ for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \ (vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp))) #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \ do { \ MNT_ILOCK(mp); \ __mnt_vnode_markerfree_all(&(mvp), (mp)); \ /* MNT_IUNLOCK(mp); -- done in above function */ \ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ } while (0) /* * Definitions for MNT_VNODE_FOREACH_ACTIVE. */ struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp); struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \ (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp))) #define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \ __mnt_vnode_markerfree_active(&(mvp), (mp)) #define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx) #define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx) #define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx) #define MNT_MTX(mp) (&(mp)->mnt_mtx) #define MNT_REF(mp) (mp)->mnt_ref++ #define MNT_REL(mp) do { \ KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \ (mp)->mnt_ref--; \ if ((mp)->mnt_ref == 0) \ wakeup((mp)); \ } while (0) #endif /* _KERNEL */ /* * User specifiable flags, stored in mnt_flag. */ #define MNT_RDONLY 0x0000000000000001ULL /* read only filesystem */ #define MNT_SYNCHRONOUS 0x0000000000000002ULL /* fs written synchronously */ #define MNT_NOEXEC 0x0000000000000004ULL /* can't exec from filesystem */ #define MNT_NOSUID 0x0000000000000008ULL /* don't honor setuid fs bits */ #define MNT_NFS4ACLS 0x0000000000000010ULL /* enable NFS version 4 ACLs */ #define MNT_UNION 0x0000000000000020ULL /* union with underlying fs */ #define MNT_ASYNC 0x0000000000000040ULL /* fs written asynchronously */ #define MNT_SUIDDIR 0x0000000000100000ULL /* special SUID dir handling */ #define MNT_SOFTDEP 0x0000000000200000ULL /* using soft updates */ #define MNT_NOSYMFOLLOW 0x0000000000400000ULL /* do not follow symlinks */ #define MNT_GJOURNAL 0x0000000002000000ULL /* GEOM journal support enabled */ #define MNT_MULTILABEL 0x0000000004000000ULL /* MAC support for objects */ #define MNT_ACLS 0x0000000008000000ULL /* ACL support enabled */ #define MNT_NOATIME 0x0000000010000000ULL /* dont update file access time */ #define MNT_NOCLUSTERR 0x0000000040000000ULL /* disable cluster read */ #define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */ #define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */ #define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */ /* * NFS export related mount flags. */ #define MNT_EXRDONLY 0x0000000000000080ULL /* exported read only */ #define MNT_EXPORTED 0x0000000000000100ULL /* filesystem is exported */ #define MNT_DEFEXPORTED 0x0000000000000200ULL /* exported to the world */ #define MNT_EXPORTANON 0x0000000000000400ULL /* anon uid mapping for all */ #define MNT_EXKERB 0x0000000000000800ULL /* exported with Kerberos */ #define MNT_EXPUBLIC 0x0000000020000000ULL /* public export (WebNFS) */ /* * Flags set by internal operations, * but visible to the user. * XXX some of these are not quite right.. (I've never seen the root flag set) */ #define MNT_LOCAL 0x0000000000001000ULL /* filesystem is stored locally */ #define MNT_QUOTA 0x0000000000002000ULL /* quotas are enabled on fs */ #define MNT_ROOTFS 0x0000000000004000ULL /* identifies the root fs */ #define MNT_USER 0x0000000000008000ULL /* mounted by a user */ #define MNT_IGNORE 0x0000000000800000ULL /* do not show entry in df */ /* * Mask of flags that are visible to statfs(). * XXX I think that this could now become (~(MNT_CMDFLAGS)) * but the 'mount' program may need changing to handle this. */ #define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ MNT_NOSUID | MNT_UNION | MNT_SUJ | \ MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ MNT_NFS4ACLS | MNT_AUTOMOUNTED) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | \ MNT_NOATIME | \ MNT_NOSYMFOLLOW | MNT_IGNORE | \ MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \ MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \ MNT_AUTOMOUNTED) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. * XXX: These are not STATES and really should be somewhere else. * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL, * but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2), * and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2), * it's harmless. */ #define MNT_UPDATE 0x0000000000010000ULL /* not real mount, just update */ #define MNT_DELEXPORT 0x0000000000020000ULL /* delete export host lists */ #define MNT_RELOAD 0x0000000000040000ULL /* reload filesystem data */ #define MNT_FORCE 0x0000000000080000ULL /* force unmount or readonly */ #define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */ #define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */ #define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */ #define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \ MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \ MNT_BYFSID) /* * Internal filesystem control flags stored in mnt_kern_flag. * * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed * past the mount point. This keeps the subtree stable during mounts * and unmounts. * * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while * dounmount() is still waiting to lock the mountpoint. This allows * the filesystem to cancel operations that might otherwise deadlock * with the unmount attempt (used by NFS). * * MNTK_NOINSMNTQ is strict subset of MNTK_UNMOUNT. They are separated * to allow for failed unmount attempt to restore the syncer vnode for * the mount. */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ #define MNTK_ASYNC 0x00000002 /* filtered async flag */ #define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */ #define MNTK_NOINSMNTQ 0x00000008 /* insmntque is not allowed */ #define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ #define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */ #define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads and writes. Filesystem shall properly handle i/o state on EFAULT. */ #define MNTK_VGONE_UPPER 0x00000200 #define MNTK_VGONE_WAITER 0x00000400 #define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 #define MNTK_MARKER 0x00001000 #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ #define MNTK_SUSPEND2 0x04000000 /* block secondary writes */ #define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */ #define MNTK_NULL_NOCACHE 0x20000000 /* auto disable cache for nullfs mounts over this fs */ #define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */ #define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */ #ifdef _KERNEL static inline int MNT_SHARED_WRITES(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0); } static inline int MNT_EXTENDED_SHARED(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0); } #endif /* * Sysctl CTL_VFS definitions. * * Second level identifier specifies which filesystem. Second level * identifier VFS_VFSCONF returns information about all filesystems. * Second level identifier VFS_GENERIC is non-terminal. */ #define VFS_VFSCONF 0 /* get configured filesystems */ #define VFS_GENERIC 0 /* generic filesystem information */ /* * Third level identifiers for VFS_GENERIC are given below; third * level identifiers for specific filesystems are given in their * mount specific header files. */ #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ /* * Flags for various system call interfaces. * * waitfor flags to vfs_sync() and getfsstat() */ #define MNT_WAIT 1 /* synchronously wait for I/O to complete */ #define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ #define MNT_LAZY 3 /* push data not written by filesystem syncer */ #define MNT_SUSPEND 4 /* Suspend file system after sync */ /* * Generic file handle */ struct fhandle { fsid_t fh_fsid; /* Filesystem id of mount point */ struct fid fh_fid; /* Filesys specific id */ }; typedef struct fhandle fhandle_t; /* * Old export arguments without security flavor list */ struct oexport_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ }; /* * Export arguments for local filesystem mount calls. */ #define MAXSECFLAVORS 5 struct export_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ int ex_numsecflavors; /* security flavor count */ int ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */ }; /* * Structure holding information for a publicly exported filesystem * (WebNFS). Currently the specs allow just for one such filesystem. */ struct nfs_public { int np_valid; /* Do we hold valid information */ fhandle_t np_handle; /* Filehandle for pub fs (internal) */ struct mount *np_mount; /* Mountpoint of exported fs */ char *np_index; /* Index file */ }; /* * Filesystem configuration information. One of these exists for each * type of filesystem supported by the kernel. These are searched at * mount time to identify the requested filesystem. * * XXX: Never change the first two arguments! */ struct vfsconf { u_int vfc_version; /* ABI version number */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ struct vfsops *vfc_vfsops; /* filesystem operations vector */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsoptdecl *vfc_opts; /* mount options */ TAILQ_ENTRY(vfsconf) vfc_list; /* list of vfscons */ }; /* Userland version of the struct vfsconf. */ struct xvfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsconf *vfc_next; /* next in list */ }; #ifndef BURN_BRIDGES struct ovfsconf { void *vfc_vfsops; char vfc_name[32]; int vfc_index; int vfc_refcount; int vfc_flags; }; #endif /* * NB: these flags refer to IMPLEMENTATION properties, not properties of * any actual mounts; i.e., it does not make sense to change the flags. */ #define VFCF_STATIC 0x00010000 /* statically compiled into kernel */ #define VFCF_NETWORK 0x00020000 /* may get data over the network */ #define VFCF_READONLY 0x00040000 /* writes are not implemented */ #define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ #define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */ #define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ #define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */ #define VFCF_SBDRY 0x01000000 /* defer stop requests */ typedef uint32_t fsctlop_t; struct vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on */ char vc_fstypename[MFSNAMELEN]; /* type of fs 'nfs' or '*' */ fsctlop_t vc_op; /* operation VFS_CTL_* (below) */ void *vc_ptr; /* pointer to data structure */ size_t vc_len; /* sizeof said structure */ u_int32_t vc_spare[12]; /* spare (must be zero) */ }; /* vfsidctl API version. */ #define VFS_CTL_VERS1 0x01 /* * New style VFS sysctls, do not reuse/conflict with the namespace for * private sysctls. * All "global" sysctl ops have the 33rd bit set: * 0x...1.... * Private sysctl ops should have the 33rd bit unset. */ #define VFS_CTL_QUERY 0x00010001 /* anything wrong? (vfsquery) */ #define VFS_CTL_TIMEO 0x00010002 /* set timeout for vfs notification */ #define VFS_CTL_NOLOCKS 0x00010003 /* disable file locking */ struct vfsquery { u_int32_t vq_flags; u_int32_t vq_spare[31]; }; /* vfsquery flags */ #define VQ_NOTRESP 0x0001 /* server down */ #define VQ_NEEDAUTH 0x0002 /* server bad auth */ #define VQ_LOWDISK 0x0004 /* we're low on space */ #define VQ_MOUNT 0x0008 /* new filesystem arrived */ #define VQ_UNMOUNT 0x0010 /* filesystem has left */ #define VQ_DEAD 0x0020 /* filesystem is dead, needs force unmount */ #define VQ_ASSIST 0x0040 /* filesystem needs assistance from external program */ #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ #define VQ_FLAG0100 0x0100 /* placeholder */ #define VQ_FLAG0200 0x0200 /* placeholder */ #define VQ_FLAG0400 0x0400 /* placeholder */ #define VQ_FLAG0800 0x0800 /* placeholder */ #define VQ_FLAG1000 0x1000 /* placeholder */ #define VQ_FLAG2000 0x2000 /* placeholder */ #define VQ_FLAG4000 0x4000 /* placeholder */ #define VQ_FLAG8000 0x8000 /* placeholder */ #ifdef _KERNEL /* Point a sysctl request at a vfsidctl's data. */ #define VCTLTOREQ(vc, req) \ do { \ (req)->newptr = (vc)->vc_ptr; \ (req)->newlen = (vc)->vc_len; \ (req)->newidx = 0; \ } while (0) #endif struct iovec; struct uio; #ifdef _KERNEL /* * vfs_busy specific flags and mask. */ #define MBF_NOWAIT 0x01 #define MBF_MNTLSTLOCK 0x02 #define MBF_MASK (MBF_NOWAIT | MBF_MNTLSTLOCK) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MOUNT); MALLOC_DECLARE(M_STATFS); #endif extern int maxvfsconf; /* highest defined filesystem type */ TAILQ_HEAD(vfsconfhead, vfsconf); extern struct vfsconfhead vfsconf; /* * Operations supported on mounted filesystem. */ struct mount_args; struct nameidata; struct sysctl_req; struct mntarg; typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags); typedef int vfs_unmount_t(struct mount *mp, int mntflags); typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp); typedef int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg); typedef int vfs_statfs_t(struct mount *mp, struct statfs *sbp); typedef int vfs_sync_t(struct mount *mp, int waitfor); typedef int vfs_vget_t(struct mount *mp, ino_t ino, int flags, struct vnode **vpp); typedef int vfs_fhtovp_t(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp); typedef int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp, int *numsecflavors, int **secflavors); typedef int vfs_init_t(struct vfsconf *); typedef int vfs_uninit_t(struct vfsconf *); typedef int vfs_extattrctl_t(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname); typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); typedef void vfs_purge_t(struct mount *mp); struct vfsops { vfs_mount_t *vfs_mount; vfs_cmount_t *vfs_cmount; vfs_unmount_t *vfs_unmount; vfs_root_t *vfs_root; vfs_quotactl_t *vfs_quotactl; vfs_statfs_t *vfs_statfs; vfs_sync_t *vfs_sync; vfs_vget_t *vfs_vget; vfs_fhtovp_t *vfs_fhtovp; vfs_checkexp_t *vfs_checkexp; vfs_init_t *vfs_init; vfs_uninit_t *vfs_uninit; vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; vfs_notify_lowervp_t *vfs_reclaim_lowervp; vfs_notify_lowervp_t *vfs_unlink_lowervp; vfs_purge_t *vfs_purge; vfs_mount_t *vfs_spare[6]; /* spares for ABI compat */ }; vfs_statfs_t __vfs_statfs; #define VFS_PROLOGUE(MP) do { \ struct mount *mp__; \ int _prev_stops; \ \ mp__ = (MP); \ _prev_stops = sigdeferstop((mp__ != NULL && \ (mp__->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0) ? \ SIGDEFERSTOP_SILENT : SIGDEFERSTOP_NOP); #define VFS_EPILOGUE(MP) \ sigallowstop(_prev_stops); \ } while (0) #define VFS_MOUNT(MP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_mount)(MP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_UNMOUNT(MP, FORCE) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_ROOT(MP, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_STATFS(MP, SBP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = __vfs_statfs((MP), (SBP)); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYNC(MP, WAIT) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_VGET(MP, INO, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\ SEC); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYSCTL(MP, OP, REQ) ({ \ int _rc; \ \ VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \ VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SUSP_CLEAN(MP) do { \ if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_susp_clean)(MP); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_RECLAIM_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_UNLINK_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_PURGE(MP) do { \ if (*(MP)->mnt_op->vfs_purge != NULL) { \ VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_purge)(MP); \ VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), KNF_LISTLOCKED); \ } while (0) #define VFS_KNOTE_UNLOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), 0); \ } while (0) #define VFS_NOTIFY_UPPER_RECLAIM 1 #define VFS_NOTIFY_UPPER_UNLINK 2 #include /* * Version numbers. */ #define VFS_VERSION_00 0x19660120 #define VFS_VERSION_01 0x20121030 #define VFS_VERSION VFS_VERSION_01 #define VFS_SET(vfsops, fsname, flags) \ static struct vfsconf fsname ## _vfsconf = { \ .vfc_version = VFS_VERSION, \ .vfc_name = #fsname, \ .vfc_vfsops = &vfsops, \ .vfc_typenum = -1, \ .vfc_flags = flags, \ }; \ static moduledata_t fsname ## _mod = { \ #fsname, \ vfs_modevent, \ & fsname ## _vfsconf \ }; \ DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE) /* * exported vnode operations */ int dounmount(struct mount *, int, struct thread *); int kernel_mount(struct mntarg *ma, uint64_t flags); int kernel_vmount(int flags, ...); struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len); struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name); struct mntarg *mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...); struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, int len); void statfs_scale_blocks(struct statfs *sf, long max_size); struct vfsconf *vfs_byname(const char *); struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *); void vfs_mount_destroy(struct mount *); void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); void vfs_freeopts(struct vfsoptlist *opts); void vfs_deleteopt(struct vfsoptlist *opts, const char *name); int vfs_buildopts(struct uio *auio, struct vfsoptlist **options); int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, uint64_t val); int vfs_getopt(struct vfsoptlist *, const char *, void **, int *); int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); int vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value); char *vfs_getopts(struct vfsoptlist *, const char *, int *error); int vfs_copyopt(struct vfsoptlist *, const char *, void *, int); int vfs_filteropt(struct vfsoptlist *, const char **legal); void vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...); int vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...); int vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value); int vfs_setpublicfs /* set publicly exported fs */ (struct mount *, struct netexport *, struct export_args *); void vfs_msync(struct mount *, int); int vfs_busy(struct mount *, int); int vfs_export /* process mount export info */ (struct mount *, struct export_args *); void vfs_allocate_syncvnode(struct mount *); void vfs_deallocate_syncvnode(struct mount *); int vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions); void vfs_getnewfsid(struct mount *); struct cdev *vfs_getrootfsid(struct mount *); struct mount *vfs_getvfs(fsid_t *); /* return vfs given fsid */ struct mount *vfs_busyfs(fsid_t *); int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); void vfs_notify_upper(struct vnode *, int); void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp); void vfs_ref(struct mount *); void vfs_rel(struct mount *); struct mount *vfs_mount_alloc(struct vnode *, struct vfsconf *, const char *, struct ucred *); int vfs_suser(struct mount *, struct thread *); void vfs_unbusy(struct mount *); void vfs_unmountall(void); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx mountlist_mtx; extern struct nfs_public nfs_pub; extern struct sx vfsconf_sx; #define vfsconf_lock() sx_xlock(&vfsconf_sx) #define vfsconf_unlock() sx_xunlock(&vfsconf_sx) #define vfsconf_slock() sx_slock(&vfsconf_sx) #define vfsconf_sunlock() sx_sunlock(&vfsconf_sx) /* * Declarations for these vfs default operations are located in * kern/vfs_default.c. They will be automatically used to replace * null entries in VFS ops tables when registering a new filesystem * type in the global table. */ vfs_root_t vfs_stdroot; vfs_quotactl_t vfs_stdquotactl; vfs_statfs_t vfs_stdstatfs; vfs_sync_t vfs_stdsync; vfs_sync_t vfs_stdnosync; vfs_vget_t vfs_stdvget; vfs_fhtovp_t vfs_stdfhtovp; vfs_checkexp_t vfs_stdcheckexp; vfs_init_t vfs_stdinit; vfs_uninit_t vfs_stduninit; vfs_extattrctl_t vfs_stdextattrctl; vfs_sysctl_t vfs_stdsysctl; void syncer_suspend(void); void syncer_resume(void); #else /* !_KERNEL */ #include struct stat; __BEGIN_DECLS int fhopen(const struct fhandle *, int); int fhstat(const struct fhandle *, struct stat *); int fhstatfs(const struct fhandle *, struct statfs *); int fstatfs(int, struct statfs *); int getfh(const char *, fhandle_t *); int getfsstat(struct statfs *, long, int); int getmntinfo(struct statfs **, int); int lgetfh(const char *, fhandle_t *); int mount(const char *, const char *, int, void *); int nmount(struct iovec *, unsigned int, int); int statfs(const char *, struct statfs *); int unmount(const char *, int); /* C library stuff */ int getvfsbyname(const char *, struct xvfsconf *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_MOUNT_H_ */ Index: head/sys/sys/param.h =================================================================== --- head/sys/sys/param.h (revision 318735) +++ head/sys/sys/param.h (revision 318736) @@ -1,363 +1,363 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD$ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #include #define BSD 199506 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 /* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. * Currently this lives here in the doc/ repository: * * head/en_US.ISO8859-1/books/porters-handbook/versions/chapter.xml * * scheme is: Rxx * 'R' is in the range 0 to 4 if this is a release branch or * x.0-CURRENT before RELENG_*_0 is created, otherwise 'R' is * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1200030 /* Master, propagated to newvers */ +#define __FreeBSD_version 1200031 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, * which by definition is always true on FreeBSD. This macro is also defined * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD. * * It is tempting to use this macro in userland code when we want to enable * kernel-specific routines, and in fact it's fine to do this in code that * is part of FreeBSD itself. However, be aware that as presence of this * macro is still not widespread (e.g. older FreeBSD versions, 3rd party * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in * external applications without also checking for __FreeBSD__ as an * alternative. */ #undef __FreeBSD_kernel__ #define __FreeBSD_kernel__ #ifdef _KERNEL #define P_OSREL_SIGWAIT 700000 #define P_OSREL_SIGSEGV 700004 #define P_OSREL_MAP_ANON 800104 #define P_OSREL_MAP_FSTRICT 1100036 #define P_OSREL_SHUTDOWN_ENOTCONN 1100077 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) */ #include #define MAXCOMLEN 19 /* max command name remembered */ #define MAXINTERP PATH_MAX /* max interpreter file name length */ #define MAXLOGNAME 33 /* max login name length (incl. NUL) */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS (NGROUPS_MAX+1) /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ #define SPECNAMELEN 63 /* max length of devicename */ /* More types and definitions used throughout the kernel. */ #ifdef _KERNEL #include #include #ifndef LOCORE #include #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #endif #ifndef _KERNEL /* Signals. */ #include #endif /* Machine type dependent parameters. */ #include #ifndef _KERNEL #include #endif #ifndef DEV_BSHIFT #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #endif #define DEV_BSIZE (1<>PAGE_SHIFT) #endif /* * btodb() is messy and perhaps slow because `bytes' may be an off_t. We * want to shift an unsigned type to avoid sign extension and we don't * want to widen `bytes' unnecessarily. Assume that the result fits in * a daddr_t. */ #ifndef btodb #define btodb(bytes) /* calculates (bytes / DEV_BSIZE) */ \ (sizeof (bytes) > sizeof(long) \ ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \ : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT)) #endif #ifndef dbtob #define dbtob(db) /* calculates (db * DEV_BSIZE) */ \ ((off_t)(db) << DEV_BSHIFT) #endif #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ #define NBBY 8 /* number of bits in a byte */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ /* * File system parameters and macros. * * MAXBSIZE - Filesystems are made out of blocks of at most MAXBSIZE bytes * per block. MAXBSIZE may be made larger without effecting * any existing filesystems as long as it does not exceed MAXPHYS, * and may be made smaller at the risk of not being able to use * filesystems which require a block size exceeding MAXBSIZE. * * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache. This must * be >= MAXBSIZE and can be set differently for different * architectures by defining it in . * Making this larger allows NFS to do larger reads/writes. * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture * basis by defining it in . This should * probably be done to increase its value, when MAXBCACHEBUF is * defined as a larger value in . * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you * make it too big the kernel will not be able to optimally use * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a * normal UFS filesystem. */ #define MAXBSIZE 65536 /* must be power of 2 */ #ifndef MAXBCACHEBUF #define MAXBCACHEBUF MAXBSIZE /* must be a power of 2 >= MAXBSIZE */ #endif #ifndef BKVASIZE #define BKVASIZE 16384 /* must be power of 2 */ #endif #define BKVAMASK (BKVASIZE-1) /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) \ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) \ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #define rounddown(x, y) (((x)/(y))*(y)) #define rounddown2(x, y) ((x)&(~((y)-1))) /* if y is power of two */ #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #ifdef _KERNEL /* * Basic byte order function prototypes for non-inline functions. */ #ifndef LOCORE #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS __uint32_t htonl(__uint32_t); __uint16_t htons(__uint16_t); __uint32_t ntohl(__uint32_t); __uint16_t ntohs(__uint16_t); __END_DECLS #endif #endif #ifndef lint #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif /* !_BYTEORDER_FUNC_DEFINED */ #endif /* lint */ #endif /* _KERNEL */ /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1<> (PAGE_SHIFT - DEV_BSHIFT)) #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) /* * Old spelling of __containerof(). */ #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) /* * Access a variable length array that has been declared as a fixed * length array. */ #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset]) #endif /* _SYS_PARAM_H_ */ Index: head/sys/sys/stat.h =================================================================== --- head/sys/sys/stat.h (revision 318735) +++ head/sys/sys/stat.h (revision 318736) @@ -1,361 +1,402 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)stat.h 8.12 (Berkeley) 6/16/95 * $FreeBSD$ */ #ifndef _SYS_STAT_H_ #define _SYS_STAT_H_ #include #include #include #ifndef _BLKSIZE_T_DECLARED typedef __blksize_t blksize_t; #define _BLKSIZE_T_DECLARED #endif #ifndef _BLKCNT_T_DECLARED typedef __blkcnt_t blkcnt_t; #define _BLKCNT_T_DECLARED #endif #ifndef _DEV_T_DECLARED typedef __dev_t dev_t; #define _DEV_T_DECLARED #endif #ifndef _FFLAGS_T_DECLARED typedef __fflags_t fflags_t; #define _FFLAGS_T_DECLARED #endif #ifndef _GID_T_DECLARED typedef __gid_t gid_t; #define _GID_T_DECLARED #endif #ifndef _INO_T_DECLARED typedef __ino_t ino_t; #define _INO_T_DECLARED #endif #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; #define _MODE_T_DECLARED #endif #ifndef _NLINK_T_DECLARED typedef __nlink_t nlink_t; #define _NLINK_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; #define _OFF_T_DECLARED #endif #ifndef _UID_T_DECLARED typedef __uid_t uid_t; #define _UID_T_DECLARED #endif #if !defined(_KERNEL) && __BSD_VISIBLE /* * XXX We get miscellaneous namespace pollution with this. */ #include #endif #ifdef _KERNEL struct ostat { __uint16_t st_dev; /* inode's device */ - ino_t st_ino; /* inode's number */ + __uint32_t st_ino; /* inode's number */ mode_t st_mode; /* inode protection mode */ - nlink_t st_nlink; /* number of hard links */ + __uint16_t st_nlink; /* number of hard links */ __uint16_t st_uid; /* user ID of the file's owner */ __uint16_t st_gid; /* group ID of the file's group */ __uint16_t st_rdev; /* device type */ __int32_t st_size; /* file size, in bytes */ struct timespec st_atim; /* time of last access */ struct timespec st_mtim; /* time of last data modification */ struct timespec st_ctim; /* time of last file status change */ __int32_t st_blksize; /* optimal blocksize for I/O */ __int32_t st_blocks; /* blocks allocated for file */ fflags_t st_flags; /* user defined flags for file */ __uint32_t st_gen; /* file generation number */ }; #endif -struct stat { - __dev_t st_dev; /* inode's device */ - ino_t st_ino; /* inode's number */ +#if defined(_WANT_FREEBSD11_STAT) || defined(_KERNEL) +struct freebsd11_stat { + __uint32_t st_dev; /* inode's device */ + __uint32_t st_ino; /* inode's number */ mode_t st_mode; /* inode protection mode */ - nlink_t st_nlink; /* number of hard links */ + __uint16_t st_nlink; /* number of hard links */ uid_t st_uid; /* user ID of the file's owner */ gid_t st_gid; /* group ID of the file's group */ - __dev_t st_rdev; /* device type */ + __uint32_t st_rdev; /* device type */ struct timespec st_atim; /* time of last access */ struct timespec st_mtim; /* time of last data modification */ struct timespec st_ctim; /* time of last file status change */ off_t st_size; /* file size, in bytes */ blkcnt_t st_blocks; /* blocks allocated for file */ blksize_t st_blksize; /* optimal blocksize for I/O */ fflags_t st_flags; /* user defined flags for file */ __uint32_t st_gen; /* file generation number */ __int32_t st_lspare; struct timespec st_birthtim; /* time of file creation */ /* * Explicitly pad st_birthtim to 16 bytes so that the size of * struct stat is backwards compatible. We use bitfields instead * of an array of chars so that this doesn't require a C99 compiler * to compile if the size of the padding is 0. We use 2 bitfields * to cover up to 64 bits on 32-bit machines. We assume that * CHAR_BIT is 8... */ unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec)); unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec)); }; +#endif /* _WANT_FREEBSD11_STAT || _KERNEL */ +#if defined(__i386__) +#define __STAT_TIME_T_EXT 1 +#endif + +struct stat { + dev_t st_dev; /* inode's device */ + ino_t st_ino; /* inode's number */ + nlink_t st_nlink; /* number of hard links */ + mode_t st_mode; /* inode protection mode */ + __int16_t st_padding0; + uid_t st_uid; /* user ID of the file's owner */ + gid_t st_gid; /* group ID of the file's group */ + __int32_t st_padding1; + dev_t st_rdev; /* device type */ +#ifdef __STAT_TIME_T_EXT + __int32_t st_atim_ext; +#endif + struct timespec st_atim; /* time of last access */ +#ifdef __STAT_TIME_T_EXT + __int32_t st_mtim_ext; +#endif + struct timespec st_mtim; /* time of last data modification */ +#ifdef __STAT_TIME_T_EXT + __int32_t st_ctim_ext; +#endif + struct timespec st_ctim; /* time of last file status change */ +#ifdef __STAT_TIME_T_EXT + __int32_t st_btim_ext; +#endif + struct timespec st_birthtim; /* time of file creation */ + off_t st_size; /* file size, in bytes */ + blkcnt_t st_blocks; /* blocks allocated for file */ + blksize_t st_blksize; /* optimal blocksize for I/O */ + fflags_t st_flags; /* user defined flags for file */ + __uint64_t st_gen; /* file generation number */ + __uint64_t st_spare[10]; +}; + #ifdef _KERNEL struct nstat { - __dev_t st_dev; /* inode's device */ - ino_t st_ino; /* inode's number */ + __uint32_t st_dev; /* inode's device */ + __uint32_t st_ino; /* inode's number */ __uint32_t st_mode; /* inode protection mode */ __uint32_t st_nlink; /* number of hard links */ uid_t st_uid; /* user ID of the file's owner */ gid_t st_gid; /* group ID of the file's group */ - __dev_t st_rdev; /* device type */ + __uint32_t st_rdev; /* device type */ struct timespec st_atim; /* time of last access */ struct timespec st_mtim; /* time of last data modification */ struct timespec st_ctim; /* time of last file status change */ off_t st_size; /* file size, in bytes */ blkcnt_t st_blocks; /* blocks allocated for file */ blksize_t st_blksize; /* optimal blocksize for I/O */ fflags_t st_flags; /* user defined flags for file */ __uint32_t st_gen; /* file generation number */ struct timespec st_birthtim; /* time of file creation */ /* - * See above about the following padding. + * See comment in the definition of struct freebsd11_stat + * above about the following padding. */ unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec)); unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec)); }; #endif #ifndef _KERNEL #define st_atime st_atim.tv_sec #define st_mtime st_mtim.tv_sec #define st_ctime st_ctim.tv_sec #if __BSD_VISIBLE #define st_birthtime st_birthtim.tv_sec #endif /* For compatibility. */ #if __BSD_VISIBLE #define st_atimespec st_atim #define st_mtimespec st_mtim #define st_ctimespec st_ctim #define st_birthtimespec st_birthtim #endif #endif /* !_KERNEL */ #define S_ISUID 0004000 /* set user id on execution */ #define S_ISGID 0002000 /* set group id on execution */ #if __BSD_VISIBLE #define S_ISTXT 0001000 /* sticky bit */ #endif #define S_IRWXU 0000700 /* RWX mask for owner */ #define S_IRUSR 0000400 /* R for owner */ #define S_IWUSR 0000200 /* W for owner */ #define S_IXUSR 0000100 /* X for owner */ #if __BSD_VISIBLE #define S_IREAD S_IRUSR #define S_IWRITE S_IWUSR #define S_IEXEC S_IXUSR #endif #define S_IRWXG 0000070 /* RWX mask for group */ #define S_IRGRP 0000040 /* R for group */ #define S_IWGRP 0000020 /* W for group */ #define S_IXGRP 0000010 /* X for group */ #define S_IRWXO 0000007 /* RWX mask for other */ #define S_IROTH 0000004 /* R for other */ #define S_IWOTH 0000002 /* W for other */ #define S_IXOTH 0000001 /* X for other */ #if __XSI_VISIBLE #define S_IFMT 0170000 /* type of file mask */ #define S_IFIFO 0010000 /* named pipe (fifo) */ #define S_IFCHR 0020000 /* character special */ #define S_IFDIR 0040000 /* directory */ #define S_IFBLK 0060000 /* block special */ #define S_IFREG 0100000 /* regular */ #define S_IFLNK 0120000 /* symbolic link */ #define S_IFSOCK 0140000 /* socket */ #define S_ISVTX 0001000 /* save swapped text even after use */ #endif #if __BSD_VISIBLE #define S_IFWHT 0160000 /* whiteout */ #endif #define S_ISDIR(m) (((m) & 0170000) == 0040000) /* directory */ #define S_ISCHR(m) (((m) & 0170000) == 0020000) /* char special */ #define S_ISBLK(m) (((m) & 0170000) == 0060000) /* block special */ #define S_ISREG(m) (((m) & 0170000) == 0100000) /* regular file */ #define S_ISFIFO(m) (((m) & 0170000) == 0010000) /* fifo or socket */ #if __POSIX_VISIBLE >= 200112 #define S_ISLNK(m) (((m) & 0170000) == 0120000) /* symbolic link */ #define S_ISSOCK(m) (((m) & 0170000) == 0140000) /* socket */ #endif #if __BSD_VISIBLE #define S_ISWHT(m) (((m) & 0170000) == 0160000) /* whiteout */ #endif #if __BSD_VISIBLE #define ACCESSPERMS (S_IRWXU|S_IRWXG|S_IRWXO) /* 0777 */ /* 7777 */ #define ALLPERMS (S_ISUID|S_ISGID|S_ISTXT|S_IRWXU|S_IRWXG|S_IRWXO) /* 0666 */ #define DEFFILEMODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) #define S_BLKSIZE 512 /* block size used in the stat struct */ /* * Definitions of flags stored in file flags word. * * Super-user and owner changeable flags. */ #define UF_SETTABLE 0x0000ffff /* mask of owner changeable flags */ #define UF_NODUMP 0x00000001 /* do not dump file */ #define UF_IMMUTABLE 0x00000002 /* file may not be changed */ #define UF_APPEND 0x00000004 /* writes to file may only append */ #define UF_OPAQUE 0x00000008 /* directory is opaque wrt. union */ #define UF_NOUNLINK 0x00000010 /* file may not be removed or renamed */ /* * These two bits are defined in MacOS X. They are not currently used in * FreeBSD. */ #if 0 #define UF_COMPRESSED 0x00000020 /* file is compressed */ #define UF_TRACKED 0x00000040 /* renames and deletes are tracked */ #endif #define UF_SYSTEM 0x00000080 /* Windows system file bit */ #define UF_SPARSE 0x00000100 /* sparse file */ #define UF_OFFLINE 0x00000200 /* file is offline */ #define UF_REPARSE 0x00000400 /* Windows reparse point file bit */ #define UF_ARCHIVE 0x00000800 /* file needs to be archived */ #define UF_READONLY 0x00001000 /* Windows readonly file bit */ /* This is the same as the MacOS X definition of UF_HIDDEN. */ #define UF_HIDDEN 0x00008000 /* file is hidden */ /* * Super-user changeable flags. */ #define SF_SETTABLE 0xffff0000 /* mask of superuser changeable flags */ #define SF_ARCHIVED 0x00010000 /* file is archived */ #define SF_IMMUTABLE 0x00020000 /* file may not be changed */ #define SF_APPEND 0x00040000 /* writes to file may only append */ #define SF_NOUNLINK 0x00100000 /* file may not be removed or renamed */ #define SF_SNAPSHOT 0x00200000 /* snapshot inode */ #ifdef _KERNEL /* * Shorthand abbreviations of above. */ #define OPAQUE (UF_OPAQUE) #define APPEND (UF_APPEND | SF_APPEND) #define IMMUTABLE (UF_IMMUTABLE | SF_IMMUTABLE) #define NOUNLINK (UF_NOUNLINK | SF_NOUNLINK) #endif #endif /* __BSD_VISIBLE */ #if __POSIX_VISIBLE >= 200809 #define UTIME_NOW -1 #define UTIME_OMIT -2 #endif #ifndef _KERNEL __BEGIN_DECLS #if __BSD_VISIBLE int chflags(const char *, unsigned long); int chflagsat(int, const char *, unsigned long, int); #endif int chmod(const char *, mode_t); #if __BSD_VISIBLE int fchflags(int, unsigned long); #endif #if __POSIX_VISIBLE >= 200112 int fchmod(int, mode_t); #endif #if __POSIX_VISIBLE >= 200809 int fchmodat(int, const char *, mode_t, int); int futimens(int fd, const struct timespec times[2]); int utimensat(int fd, const char *path, const struct timespec times[2], int flag); #endif int fstat(int, struct stat *); #if __BSD_VISIBLE int lchflags(const char *, unsigned long); int lchmod(const char *, mode_t); #endif #if __POSIX_VISIBLE >= 200112 int lstat(const char * __restrict, struct stat * __restrict); #endif int mkdir(const char *, mode_t); int mkfifo(const char *, mode_t); #if !defined(_MKNOD_DECLARED) && __XSI_VISIBLE int mknod(const char *, mode_t, dev_t); #define _MKNOD_DECLARED #endif int stat(const char * __restrict, struct stat * __restrict); mode_t umask(mode_t); #if __POSIX_VISIBLE >= 200809 int fstatat(int, const char *, struct stat *, int); int mkdirat(int, const char *, mode_t); int mkfifoat(int, const char *, mode_t); #endif #if __XSI_VISIBLE >= 700 int mknodat(int, const char *, mode_t, dev_t); #endif __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_STAT_H_ */ Index: head/sys/sys/syscallsubr.h =================================================================== --- head/sys/sys/syscallsubr.h (revision 318735) +++ head/sys/sys/syscallsubr.h (revision 318736) @@ -1,295 +1,300 @@ /*- * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include #include #include #include struct file; struct filecaps; enum idtype; struct itimerval; struct image_args; struct jail; struct kevent; struct kevent_copyops; struct kld_file_stat; struct ksiginfo; struct mbuf; struct msghdr; struct msqid_ds; struct pollfd; struct ogetdirentries_args; struct rlimit; struct rusage; union semun; struct sockaddr; struct stat; struct thr_param; struct sched_param; struct __wrusage; int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen, size_t path_max); int kern_accept(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, struct file **fp); int kern_accept4(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, int flags, struct file **fp); int kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_alternate_path(struct thread *td, const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd); int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds); int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights); int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg); int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id); int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts); int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqtp, struct timespec *rmtp); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_close(struct thread *td, int fd); int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp); int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, const cpuset_t *maskp); int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid); int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid); int kern_dup(struct thread *td, u_int mode, int flags, int old, int new); int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p); int kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, mode_t mode, int flag); int kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int uid, int gid, int flag); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg); int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg); -int kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, - long *basep, ssize_t *residp, enum uio_seg bufseg); +int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, + off_t *basep, ssize_t *residp, enum uio_seg bufseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getppid(struct thread *); int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getrusage(struct thread *td, int who, struct rusage *rup); int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops); int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); int kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, enum uio_seg segflg, int follow); int kern_listen(struct thread *td, int s, int backlog); int kern_lseek(struct thread *td, int fd, off_t offset, int whence); int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav); int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec); int kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, int mode); int kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, char *path, - enum uio_seg pathseg, int mode, int dev); + enum uio_seg pathseg, int mode, dev_t dev); int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr, size_t len); int kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot, int flags, int fd, off_t pos); int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); int kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags); int kern_munlock(struct thread *td, uintptr_t addr, size_t size); int kern_munmap(struct thread *td, uintptr_t addr, size_t size); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); int kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode); int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, u_long flags); int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2); int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); int kern_posix_error(struct thread *td, int error); int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, off_t offset); int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp); int kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, enum uio_seg pathseg); int kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, int *policy); int kern_sched_setparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_setscheduler(struct thread *td, struct thread *targettd, int policy, struct sched_param *param); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd, struct timespec *ts); int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg); int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_shutdown(struct thread *td, int s, int how); int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_sigtimedwait(struct thread *td, sigset_t waitset, struct ksiginfo *ksi, struct timespec *timeout); int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value); int kern_socket(struct thread *td, int domain, int type, int protocol); int kern_statat(struct thread *td, int flag, int fd, char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, enum uio_seg segflg); int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id); int kern_ktimer_delete(struct thread *, int); int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval); int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val); int kern_ktimer_getoverrun(struct thread *td, int timer_id); int kern_thr_alloc(struct proc *, int pages, struct thread **); int kern_thr_exit(struct thread *td); int kern_thr_new(struct thread *td, struct thr_param *param); int kern_thr_suspend(struct thread *td, struct timespec *tsp); int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length); int kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int follow); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status, int options, struct __wrusage *wrup, siginfo_t *sip); int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ + +struct freebsd11_dirent; + +int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int + count, long *basep, void (*func)(struct freebsd11_dirent *)); #endif /* !_SYS_SYSCALLSUBR_H_ */ Index: head/sys/sys/tty.h =================================================================== --- head/sys/sys/tty.h (revision 318735) +++ head/sys/sys/tty.h (revision 318736) @@ -1,228 +1,228 @@ /*- * Copyright (c) 2008 Ed Schouten * All rights reserved. * * Portions of this software were developed under sponsorship from Snow * B.V., the Netherlands. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_TTY_H_ #define _SYS_TTY_H_ #include #include #include #include #include #include #include #include #include struct cdev; struct file; struct pgrp; struct session; struct ucred; struct ttydevsw; /* * Per-TTY structure, containing buffers, etc. * * List of locks * (t) locked by t_mtx * (l) locked by tty_list_sx * (c) const until freeing */ struct tty { struct mtx *t_mtx; /* TTY lock. */ struct mtx t_mtxobj; /* Per-TTY lock (when not borrowing). */ TAILQ_ENTRY(tty) t_list; /* (l) TTY list entry. */ int t_drainwait; /* (t) TIOCDRAIN timeout seconds. */ unsigned int t_flags; /* (t) Terminal option flags. */ /* Keep flags in sync with db_show_tty and pstat(8). */ #define TF_NOPREFIX 0x00001 /* Don't prepend "tty" to device name. */ #define TF_INITLOCK 0x00002 /* Create init/lock state devices. */ #define TF_CALLOUT 0x00004 /* Create "cua" devices. */ #define TF_OPENED_IN 0x00008 /* "tty" node is in use. */ #define TF_OPENED_OUT 0x00010 /* "cua" node is in use. */ #define TF_OPENED_CONS 0x00020 /* Device in use as console. */ #define TF_OPENED (TF_OPENED_IN|TF_OPENED_OUT|TF_OPENED_CONS) #define TF_GONE 0x00040 /* Device node is gone. */ #define TF_OPENCLOSE 0x00080 /* Device is in open()/close(). */ #define TF_ASYNC 0x00100 /* Asynchronous I/O enabled. */ #define TF_LITERAL 0x00200 /* Accept the next character literally. */ #define TF_HIWAT_IN 0x00400 /* We've reached the input watermark. */ #define TF_HIWAT_OUT 0x00800 /* We've reached the output watermark. */ #define TF_HIWAT (TF_HIWAT_IN|TF_HIWAT_OUT) #define TF_STOPPED 0x01000 /* Output flow control - stopped. */ #define TF_EXCLUDE 0x02000 /* Exclusive access. */ #define TF_BYPASS 0x04000 /* Optimized input path. */ #define TF_ZOMBIE 0x08000 /* Modem disconnect received. */ #define TF_HOOK 0x10000 /* TTY has hook attached. */ #define TF_BUSY_IN 0x20000 /* Process busy in read() -- not supported. */ #define TF_BUSY_OUT 0x40000 /* Process busy in write(). */ #define TF_BUSY (TF_BUSY_IN|TF_BUSY_OUT) unsigned int t_revokecnt; /* (t) revoke() count. */ /* Buffering mechanisms. */ struct ttyinq t_inq; /* (t) Input queue. */ size_t t_inlow; /* (t) Input low watermark. */ struct ttyoutq t_outq; /* (t) Output queue. */ size_t t_outlow; /* (t) Output low watermark. */ /* Sleeping mechanisms. */ struct cv t_inwait; /* (t) Input wait queue. */ struct cv t_outwait; /* (t) Output wait queue. */ struct cv t_outserwait; /* (t) Serial output wait queue. */ struct cv t_bgwait; /* (t) Background wait queue. */ struct cv t_dcdwait; /* (t) Carrier Detect wait queue. */ /* Polling mechanisms. */ struct selinfo t_inpoll; /* (t) Input poll queue. */ struct selinfo t_outpoll; /* (t) Output poll queue. */ struct sigio *t_sigio; /* (t) Asynchronous I/O. */ struct termios t_termios; /* (t) I/O processing flags. */ struct winsize t_winsize; /* (t) Window size. */ unsigned int t_column; /* (t) Current cursor position. */ unsigned int t_writepos; /* (t) Where input was interrupted. */ int t_compatflags; /* (t) COMPAT_43TTY flags. */ /* Init/lock-state devices. */ struct termios t_termios_init_in; /* tty%s.init. */ struct termios t_termios_lock_in; /* tty%s.lock. */ struct termios t_termios_init_out; /* cua%s.init. */ struct termios t_termios_lock_out; /* cua%s.lock. */ struct ttydevsw *t_devsw; /* (c) Driver hooks. */ struct ttyhook *t_hook; /* (t) Capture/inject hook. */ /* Process signal delivery. */ struct pgrp *t_pgrp; /* (t) Foreground process group. */ struct session *t_session; /* (t) Associated session. */ unsigned int t_sessioncnt; /* (t) Backpointing sessions. */ void *t_devswsoftc; /* (c) Soft config, for drivers. */ void *t_hooksoftc; /* (t) Soft config, for hooks. */ struct cdev *t_dev; /* (c) Primary character device. */ }; /* * Userland version of struct tty, for sysctl kern.ttys */ struct xtty { size_t xt_size; /* Structure size. */ size_t xt_insize; /* Input queue size. */ size_t xt_incc; /* Canonicalized characters. */ size_t xt_inlc; /* Input line charaters. */ size_t xt_inlow; /* Input low watermark. */ size_t xt_outsize; /* Output queue size. */ size_t xt_outcc; /* Output queue usage. */ size_t xt_outlow; /* Output low watermark. */ unsigned int xt_column; /* Current column position. */ pid_t xt_pgid; /* Foreground process group. */ pid_t xt_sid; /* Session. */ unsigned int xt_flags; /* Terminal option flags. */ - dev_t xt_dev; /* Userland device. */ + uint32_t xt_dev; /* Userland device. XXXKIB truncated */ }; #ifdef _KERNEL /* Used to distinguish between normal, callout, lock and init devices. */ #define TTYUNIT_INIT 0x1 #define TTYUNIT_LOCK 0x2 #define TTYUNIT_CALLOUT 0x4 /* Allocation and deallocation. */ struct tty *tty_alloc(struct ttydevsw *tsw, void *softc); struct tty *tty_alloc_mutex(struct ttydevsw *tsw, void *softc, struct mtx *mtx); void tty_rel_pgrp(struct tty *tp, struct pgrp *pgrp); void tty_rel_sess(struct tty *tp, struct session *sess); void tty_rel_gone(struct tty *tp); #define tty_lock(tp) mtx_lock((tp)->t_mtx) #define tty_unlock(tp) mtx_unlock((tp)->t_mtx) #define tty_lock_owned(tp) mtx_owned((tp)->t_mtx) #define tty_lock_assert(tp,ma) mtx_assert((tp)->t_mtx, (ma)) #define tty_getlock(tp) ((tp)->t_mtx) /* Device node creation. */ int tty_makedevf(struct tty *tp, struct ucred *cred, int flags, const char *fmt, ...) __printflike(4, 5); #define TTYMK_CLONING 0x1 #define tty_makedev(tp, cred, fmt, ...) \ (void )tty_makedevf((tp), (cred), 0, (fmt), ## __VA_ARGS__) #define tty_makealias(tp,fmt,...) \ make_dev_alias((tp)->t_dev, fmt, ## __VA_ARGS__) /* Signalling processes. */ void tty_signal_sessleader(struct tty *tp, int signal); void tty_signal_pgrp(struct tty *tp, int signal); /* Waking up readers/writers. */ int tty_wait(struct tty *tp, struct cv *cv); int tty_wait_background(struct tty *tp, struct thread *td, int sig); int tty_timedwait(struct tty *tp, struct cv *cv, int timo); void tty_wakeup(struct tty *tp, int flags); /* System messages. */ int tty_checkoutq(struct tty *tp); int tty_putchar(struct tty *tp, char c); int tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td); int tty_ioctl_compat(struct tty *tp, u_long cmd, caddr_t data, int fflag, struct thread *td); void tty_set_winsize(struct tty *tp, const struct winsize *wsz); void tty_init_console(struct tty *tp, speed_t speed); void tty_flush(struct tty *tp, int flags); void tty_hiwat_in_block(struct tty *tp); void tty_hiwat_in_unblock(struct tty *tp); dev_t tty_udev(struct tty *tp); #define tty_opened(tp) ((tp)->t_flags & TF_OPENED) #define tty_gone(tp) ((tp)->t_flags & TF_GONE) #define tty_softc(tp) ((tp)->t_devswsoftc) #define tty_devname(tp) devtoname((tp)->t_dev) /* Status line printing. */ void tty_info(struct tty *tp); /* /dev/console selection. */ void ttyconsdev_select(const char *name); /* Pseudo-terminal hooks. */ int pts_alloc(int fflags, struct thread *td, struct file *fp); int pts_alloc_external(int fd, struct thread *td, struct file *fp, struct cdev *dev, const char *name); /* Drivers and line disciplines also need to call these. */ #include #include #include #endif /* _KERNEL */ #endif /* !_SYS_TTY_H_ */ Index: head/sys/sys/user.h =================================================================== --- head/sys/sys/user.h (revision 318735) +++ head/sys/sys/user.h (revision 318736) @@ -1,569 +1,603 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)user.h 8.2 (Berkeley) 9/23/93 * $FreeBSD$ */ #ifndef _SYS_USER_H_ #define _SYS_USER_H_ #include #ifndef _KERNEL /* stuff that *used* to be included by user.h, or is now needed */ #include #include #include #include #include #include #include #include #include #include /* XXX */ #include /* XXX */ #include /* XXX */ #include /* XXX */ #endif /* !_KERNEL */ #ifndef _SYS_RESOURCEVAR_H_ #include #endif #ifndef _SYS_SIGNALVAR_H_ #include #endif #ifndef _SYS_SOCKET_VAR_H_ #include #endif #include /* * KERN_PROC subtype ops return arrays of selected proc structure entries: * * This struct includes several arrays of spare space, with different arrays * for different standard C-types. When adding new variables to this struct, * the space for byte-aligned data should be taken from the ki_sparestring, * pointers from ki_spareptrs, word-aligned data from ki_spareints, and * doubleword-aligned data from ki_sparelongs. Make sure the space for new * variables come from the array which matches the size and alignment of * those variables on ALL hardware platforms, and then adjust the appropriate * KI_NSPARE_* value(s) to match. * * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all * platforms after you have added new variables. Note that if you change * the value of KINFO_PROC_SIZE, then many userland programs will stop * working until they are recompiled! * * Once you have added the new field, you will need to add code to initialize * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and * function kvm_proclist in lib/libkvm/kvm_proc.c . */ -#define KI_NSPARE_INT 4 +#define KI_NSPARE_INT 2 #define KI_NSPARE_LONG 12 #define KI_NSPARE_PTR 6 #ifndef _KERNEL #ifndef KINFO_PROC_SIZE #error "Unknown architecture" #endif #endif /* !_KERNEL */ #define WMESGLEN 8 /* size of returned wchan message */ #define LOCKNAMELEN 8 /* size of returned lock name */ #define TDNAMLEN 16 /* size of returned thread name */ #define COMMLEN 19 /* size of returned ki_comm name */ #define KI_EMULNAMELEN 16 /* size of returned ki_emul */ #define KI_NGROUPS 16 /* number of groups in ki_groups */ #define LOGNAMELEN 17 /* size of returned ki_login */ #define LOGINCLASSLEN 17 /* size of returned ki_loginclass */ #ifndef BURN_BRIDGES #define OCOMMLEN TDNAMLEN #define ki_ocomm ki_tdname #endif /* Flags for the process credential. */ #define KI_CRF_CAPABILITY_MODE 0x00000001 /* * Steal a bit from ki_cr_flags to indicate that the cred had more than * KI_NGROUPS groups. */ #define KI_CRF_GRP_OVERFLOW 0x80000000 struct kinfo_proc { int ki_structsize; /* size of this structure */ int ki_layout; /* reserved: layout identifier */ struct pargs *ki_args; /* address of command arguments */ struct proc *ki_paddr; /* address of proc */ struct user *ki_addr; /* kernel virtual addr of u-area */ struct vnode *ki_tracep; /* pointer to trace file */ struct vnode *ki_textvp; /* pointer to executable file */ struct filedesc *ki_fd; /* pointer to open file info */ struct vmspace *ki_vmspace; /* pointer to kernel vmspace struct */ void *ki_wchan; /* sleep address */ pid_t ki_pid; /* Process identifier */ pid_t ki_ppid; /* parent process id */ pid_t ki_pgid; /* process group id */ pid_t ki_tpgid; /* tty process group id */ pid_t ki_sid; /* Process session ID */ pid_t ki_tsid; /* Terminal session ID */ short ki_jobc; /* job control counter */ short ki_spare_short1; /* unused (just here for alignment) */ - dev_t ki_tdev; /* controlling tty dev */ + uint32_t ki_tdev_freebsd11; /* controlling tty dev */ sigset_t ki_siglist; /* Signals arrived but not delivered */ sigset_t ki_sigmask; /* Current signal mask */ sigset_t ki_sigignore; /* Signals being ignored */ sigset_t ki_sigcatch; /* Signals being caught by user */ uid_t ki_uid; /* effective user id */ uid_t ki_ruid; /* Real user id */ uid_t ki_svuid; /* Saved effective user id */ gid_t ki_rgid; /* Real group id */ gid_t ki_svgid; /* Saved effective group id */ short ki_ngroups; /* number of groups */ short ki_spare_short2; /* unused (just here for alignment) */ gid_t ki_groups[KI_NGROUPS]; /* groups */ vm_size_t ki_size; /* virtual size */ segsz_t ki_rssize; /* current resident set size in pages */ segsz_t ki_swrss; /* resident set size before last swap */ segsz_t ki_tsize; /* text size (pages) XXX */ segsz_t ki_dsize; /* data size (pages) XXX */ segsz_t ki_ssize; /* stack size (pages) */ u_short ki_xstat; /* Exit status for wait & stop signal */ u_short ki_acflag; /* Accounting flags */ fixpt_t ki_pctcpu; /* %cpu for process during ki_swtime */ u_int ki_estcpu; /* Time averaged value of ki_cpticks */ u_int ki_slptime; /* Time since last blocked */ u_int ki_swtime; /* Time swapped in or out */ u_int ki_cow; /* number of copy-on-write faults */ u_int64_t ki_runtime; /* Real time in microsec */ struct timeval ki_start; /* starting time */ struct timeval ki_childtime; /* time used by process children */ long ki_flag; /* P_* flags */ long ki_kiflag; /* KI_* flags (below) */ int ki_traceflag; /* Kernel trace points */ char ki_stat; /* S* process status */ signed char ki_nice; /* Process "nice" value */ char ki_lock; /* Process lock (prevent swap) count */ char ki_rqindex; /* Run queue index */ u_char ki_oncpu_old; /* Which cpu we are on (legacy) */ u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */ char ki_tdname[TDNAMLEN+1]; /* thread name */ char ki_wmesg[WMESGLEN+1]; /* wchan message */ char ki_login[LOGNAMELEN+1]; /* setlogin name */ char ki_lockname[LOCKNAMELEN+1]; /* lock name */ char ki_comm[COMMLEN+1]; /* command name */ char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */ char ki_loginclass[LOGINCLASSLEN+1]; /* login class */ char ki_moretdname[MAXCOMLEN-TDNAMLEN+1]; /* more thread name */ /* * When adding new variables, take space for char-strings from the * front of ki_sparestrings, and ints from the end of ki_spareints. * That way the spare room from both arrays will remain contiguous. */ char ki_sparestrings[46]; /* spare string space */ int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */ + uint64_t ki_tdev; /* controlling tty dev */ int ki_oncpu; /* Which cpu we are on */ int ki_lastcpu; /* Last cpu we were on */ int ki_tracer; /* Pid of tracing process */ int ki_flag2; /* P2_* flags */ int ki_fibnum; /* Default FIB number */ u_int ki_cr_flags; /* Credential flags */ int ki_jid; /* Process jail ID */ int ki_numthreads; /* XXXKSE number of threads in total */ lwpid_t ki_tid; /* XXXKSE thread id */ struct priority ki_pri; /* process priority */ struct rusage ki_rusage; /* process rusage statistics */ /* XXX - most fields in ki_rusage_ch are not (yet) filled in */ struct rusage ki_rusage_ch; /* rusage of children processes */ struct pcb *ki_pcb; /* kernel virtual addr of pcb */ void *ki_kstack; /* kernel virtual addr of stack */ void *ki_udata; /* User convenience pointer */ struct thread *ki_tdaddr; /* address of thread */ /* * When adding new variables, take space for pointers from the * front of ki_spareptrs, and longs from the end of ki_sparelongs. * That way the spare room from both arrays will remain contiguous. */ void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */ long ki_sflag; /* PS_* flags */ long ki_tdflags; /* XXXKSE kthread flag */ }; void fill_kinfo_proc(struct proc *, struct kinfo_proc *); /* XXX - the following two defines are temporary */ #define ki_childstime ki_rusage_ch.ru_stime #define ki_childutime ki_rusage_ch.ru_utime /* * Legacy PS_ flag. This moved to p_flag but is maintained for * compatibility. */ #define PS_INMEM 0x00001 /* Loaded into memory. */ /* ki_sessflag values */ #define KI_CTTY 0x00000001 /* controlling tty vnode active */ #define KI_SLEADER 0x00000002 /* session leader */ #define KI_LOCKBLOCK 0x00000004 /* proc blocked on lock ki_lockname */ /* * This used to be the per-process structure containing data that * isn't needed in core when the process is swapped out, but now it * remains only for the benefit of a.out core dumps. */ struct user { struct pstats u_stats; /* *p_stats */ struct kinfo_proc u_kproc; /* eproc */ }; /* * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor * array of another process. */ #define KF_ATTR_VALID 0x0001 #define KF_TYPE_NONE 0 #define KF_TYPE_VNODE 1 #define KF_TYPE_SOCKET 2 #define KF_TYPE_PIPE 3 #define KF_TYPE_FIFO 4 #define KF_TYPE_KQUEUE 5 #define KF_TYPE_CRYPTO 6 #define KF_TYPE_MQUEUE 7 #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 #define KF_TYPE_PROCDESC 11 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 #define KF_VTYPE_VREG 1 #define KF_VTYPE_VDIR 2 #define KF_VTYPE_VBLK 3 #define KF_VTYPE_VCHR 4 #define KF_VTYPE_VLNK 5 #define KF_VTYPE_VSOCK 6 #define KF_VTYPE_VFIFO 7 #define KF_VTYPE_VBAD 8 #define KF_VTYPE_UNKNOWN 255 #define KF_FD_TYPE_CWD -1 /* Current working directory */ #define KF_FD_TYPE_ROOT -2 /* Root directory */ #define KF_FD_TYPE_JAIL -3 /* Jail directory */ #define KF_FD_TYPE_TRACE -4 /* Ktrace vnode */ #define KF_FD_TYPE_TEXT -5 /* Text vnode */ #define KF_FD_TYPE_CTTY -6 /* Controlling terminal */ #define KF_FLAG_READ 0x00000001 #define KF_FLAG_WRITE 0x00000002 #define KF_FLAG_APPEND 0x00000004 #define KF_FLAG_ASYNC 0x00000008 #define KF_FLAG_FSYNC 0x00000010 #define KF_FLAG_NONBLOCK 0x00000020 #define KF_FLAG_DIRECT 0x00000040 #define KF_FLAG_HASLOCK 0x00000080 #define KF_FLAG_SHLOCK 0x00000100 #define KF_FLAG_EXLOCK 0x00000200 #define KF_FLAG_NOFOLLOW 0x00000400 #define KF_FLAG_CREAT 0x00000800 #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 /* * Old format. Has variable hidden padding due to alignment. * This is a compatibility hack for pre-build 7.1 packages. */ #if defined(__amd64__) #define KINFO_OFILE_SIZE 1328 #endif #if defined(__i386__) #define KINFO_OFILE_SIZE 1324 #endif struct kinfo_ofile { int kf_structsize; /* Size of kinfo_file. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ /* XXX Hidden alignment padding here on amd64 */ off_t kf_offset; /* Seek location. */ int kf_vnode_type; /* Vnode type. */ int kf_sock_domain; /* Socket domain. */ int kf_sock_type; /* Socket type. */ int kf_sock_protocol; /* Socket protocol. */ char kf_path[PATH_MAX]; /* Path to file, if any. */ struct sockaddr_storage kf_sa_local; /* Socket address. */ struct sockaddr_storage kf_sa_peer; /* Peer address. */ }; #if defined(__amd64__) || defined(__i386__) /* * This size should never be changed. If you really need to, you must provide * backward ABI compatibility by allocating a new sysctl MIB that will return * the new structure. The current structure has to be returned by the current * sysctl MIB. See how it is done for the kinfo_ofile structure. */ #define KINFO_FILE_SIZE 1392 #endif struct kinfo_file { int kf_structsize; /* Variable size of record. */ int kf_type; /* Descriptor type. */ int kf_fd; /* Array index. */ int kf_ref_count; /* Reference count. */ int kf_flags; /* Flags. */ int kf_pad0; /* Round to 64 bit alignment. */ int64_t kf_offset; /* Seek location. */ - int kf_vnode_type; /* Vnode type. */ - int kf_sock_domain; /* Socket domain. */ - int kf_sock_type; /* Socket type. */ - int kf_sock_protocol; /* Socket protocol. */ - struct sockaddr_storage kf_sa_local; /* Socket address. */ - struct sockaddr_storage kf_sa_peer; /* Peer address. */ union { struct { + uint32_t kf_spareint; + /* Socket domain. */ + int kf_sock_domain0; + /* Socket type. */ + int kf_sock_type0; + /* Socket protocol. */ + int kf_sock_protocol0; + /* Socket address. */ + struct sockaddr_storage kf_sa_local; + /* Peer address. */ + struct sockaddr_storage kf_sa_peer; /* Address of so_pcb. */ uint64_t kf_sock_pcb; /* Address of inp_ppcb. */ uint64_t kf_sock_inpcb; /* Address of unp_conn. */ uint64_t kf_sock_unpconn; /* Send buffer state. */ uint16_t kf_sock_snd_sb_state; /* Receive buffer state. */ uint16_t kf_sock_rcv_sb_state; /* Round to 64 bit alignment. */ uint32_t kf_sock_pad0; } kf_sock; struct { + /* Vnode type. */ + int kf_file_type; + /* Space for future use */ + int kf_spareint[3]; + uint64_t kf_spareint64[30]; + /* Vnode filesystem id. */ + uint64_t kf_file_fsid; + /* File device. */ + uint64_t kf_file_rdev; /* Global file id. */ uint64_t kf_file_fileid; /* File size. */ uint64_t kf_file_size; - /* Vnode filesystem id. */ - uint32_t kf_file_fsid; - /* File device. */ - uint32_t kf_file_rdev; + /* Vnode filesystem id, FreeBSD 11 compat. */ + uint32_t kf_file_fsid_freebsd11; + /* File device, FreeBSD 11 compat. */ + uint32_t kf_file_rdev_freebsd11; /* File mode. */ uint16_t kf_file_mode; /* Round to 64 bit alignment. */ uint16_t kf_file_pad0; uint32_t kf_file_pad1; } kf_file; struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; uint32_t kf_sem_value; uint16_t kf_sem_mode; } kf_sem; struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; uint64_t kf_pipe_addr; uint64_t kf_pipe_peer; uint32_t kf_pipe_buffer_cnt; /* Round to 64 bit alignment. */ uint32_t kf_pipe_pad0[3]; } kf_pipe; struct { - uint32_t kf_pts_dev; + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint32_t kf_pts_dev_freebsd11; + uint32_t kf_pts_pad0; + uint64_t kf_pts_dev; /* Round to 64 bit alignment. */ - uint32_t kf_pts_pad0[7]; + uint32_t kf_pts_pad1[4]; } kf_pts; struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; pid_t kf_pid; } kf_proc; } kf_un; uint16_t kf_status; /* Status flags. */ uint16_t kf_pad1; /* Round to 32 bit alignment. */ int _kf_ispare0; /* Space for more stuff. */ cap_rights_t kf_cap_rights; /* Capability rights. */ uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */ /* Truncated before copyout in sysctl */ char kf_path[PATH_MAX]; /* Path to file, if any. */ }; +#ifndef _KERNEL +#define kf_vnode_type kf_un.kf_file.kf_file_type +#define kf_sock_domain kf_un.kf_sock.kf_sock_domain0 +#define kf_sock_type kf_un.kf_sock.kf_sock_type0 +#define kf_sock_protocol kf_un.kf_sock.kf_sock_protocol0 +#endif /* * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of * another process as a series of entries. */ #define KVME_TYPE_NONE 0 #define KVME_TYPE_DEFAULT 1 #define KVME_TYPE_VNODE 2 #define KVME_TYPE_SWAP 3 #define KVME_TYPE_DEVICE 4 #define KVME_TYPE_PHYS 5 #define KVME_TYPE_DEAD 6 #define KVME_TYPE_SG 7 #define KVME_TYPE_MGTDEVICE 8 #define KVME_TYPE_UNKNOWN 255 #define KVME_PROT_READ 0x00000001 #define KVME_PROT_WRITE 0x00000002 #define KVME_PROT_EXEC 0x00000004 #define KVME_FLAG_COW 0x00000001 #define KVME_FLAG_NEEDS_COPY 0x00000002 #define KVME_FLAG_NOCOREDUMP 0x00000004 #define KVME_FLAG_SUPER 0x00000008 #define KVME_FLAG_GROWS_UP 0x00000010 #define KVME_FLAG_GROWS_DOWN 0x00000020 #if defined(__amd64__) #define KINFO_OVMENTRY_SIZE 1168 #endif #if defined(__i386__) #define KINFO_OVMENTRY_SIZE 1128 #endif struct kinfo_ovmentry { int kve_structsize; /* Size of kinfo_vmmapentry. */ int kve_type; /* Type of map entry. */ void *kve_start; /* Starting address. */ void *kve_end; /* Finishing address. */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ void *_kve_pspare[8]; /* Space for more stuff. */ off_t kve_offset; /* Mapping offset in object */ uint64_t kve_fileid; /* inode number if vnode */ - dev_t kve_fsid; /* dev_t of vnode location */ + uint32_t kve_fsid; /* dev_t of vnode location */ int _kve_ispare[3]; /* Space for more stuff. */ }; #if defined(__amd64__) || defined(__i386__) #define KINFO_VMENTRY_SIZE 1160 #endif struct kinfo_vmentry { int kve_structsize; /* Variable size of record. */ int kve_type; /* Type of map entry. */ uint64_t kve_start; /* Starting address. */ uint64_t kve_end; /* Finishing address. */ uint64_t kve_offset; /* Mapping offset in object */ uint64_t kve_vn_fileid; /* inode number if vnode */ - uint32_t kve_vn_fsid; /* dev_t of vnode location */ + uint32_t kve_vn_fsid_freebsd11; /* dev_t of vnode location */ int kve_flags; /* Flags on map entry. */ int kve_resident; /* Number of resident pages. */ int kve_private_resident; /* Number of private pages. */ int kve_protection; /* Protection bitmask. */ int kve_ref_count; /* VM obj ref count. */ int kve_shadow_count; /* VM obj shadow count. */ int kve_vn_type; /* Vnode type. */ uint64_t kve_vn_size; /* File size. */ - uint32_t kve_vn_rdev; /* Device id if device. */ + uint32_t kve_vn_rdev_freebsd11; /* Device id if device. */ uint16_t kve_vn_mode; /* File mode. */ uint16_t kve_status; /* Status flags. */ - int _kve_ispare[12]; /* Space for more stuff. */ + uint64_t kve_vn_fsid; /* dev_t of vnode location */ + uint64_t kve_vn_rdev; /* Device id if device. */ + int _kve_ispare[8]; /* Space for more stuff. */ /* Truncated before copyout in sysctl */ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ }; /* * The "vm.objects" sysctl provides a list of all VM objects in the system * via an array of these entries. */ struct kinfo_vmobject { int kvo_structsize; /* Variable size of record. */ int kvo_type; /* Object type: KVME_TYPE_*. */ uint64_t kvo_size; /* Object size in pages. */ uint64_t kvo_vn_fileid; /* inode number if vnode. */ - uint32_t kvo_vn_fsid; /* dev_t of vnode location. */ + uint32_t kvo_vn_fsid_freebsd11; /* dev_t of vnode location. */ int kvo_ref_count; /* Reference count. */ int kvo_shadow_count; /* Shadow count. */ int kvo_memattr; /* Memory attribute. */ uint64_t kvo_resident; /* Number of resident pages. */ uint64_t kvo_active; /* Number of active pages. */ uint64_t kvo_inactive; /* Number of inactive pages. */ - uint64_t _kvo_qspare[8]; + uint64_t kvo_vn_fsid; + uint64_t _kvo_qspare[7]; uint32_t _kvo_ispare[8]; char kvo_path[PATH_MAX]; /* Pathname, if any. */ }; /* * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of * another process as a series of entries. Each stack is represented by a * series of symbol names and offsets as generated by stack_sbuf_print(9). */ #define KKST_MAXLEN 1024 #define KKST_STATE_STACKOK 0 /* Stack is valid. */ #define KKST_STATE_SWAPPED 1 /* Stack swapped out. */ #define KKST_STATE_RUNNING 2 /* Stack ephemeral. */ #if defined(__amd64__) || defined(__i386__) #define KINFO_KSTACK_SIZE 1096 #endif struct kinfo_kstack { lwpid_t kkst_tid; /* ID of thread. */ int kkst_state; /* Validity of stack. */ char kkst_trace[KKST_MAXLEN]; /* String representing stack. */ int _kkst_ispare[16]; /* Space for more stuff. */ }; struct kinfo_sigtramp { void *ksigtramp_start; void *ksigtramp_end; void *ksigtramp_spare[4]; }; #ifdef _KERNEL /* Flags for kern_proc_out function. */ #define KERN_PROC_NOTHREADS 0x1 #define KERN_PROC_MASK32 0x2 /* Flags for kern_proc_filedesc_out. */ #define KERN_FILEDESC_PACK_KINFO 0x00000001U /* Flags for kern_proc_vmmap_out. */ #define KERN_VMMAP_PACK_KINFO 0x00000001U struct sbuf; /* * The kern_proc out functions are helper functions to dump process * miscellaneous kinfo structures to sbuf. The main consumers are KERN_PROC * sysctls but they may also be used by other kernel subsystems. * * The functions manipulate the process locking state and expect the process * to be locked on enter. On return the process is unlocked. */ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags); int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); int kern_proc_out(struct proc *p, struct sbuf *sb, int flags); int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags); int vntype_to_kinfo(int vtype); #endif /* !_KERNEL */ #endif Index: head/sys/sys/vnode.h =================================================================== --- head/sys/sys/vnode.h (revision 318735) +++ head/sys/sys/vnode.h (revision 318736) @@ -1,887 +1,890 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 * $FreeBSD$ */ #ifndef _SYS_VNODE_H_ #define _SYS_VNODE_H_ #include #include #include #include #include #include #include #include #include #include /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, * each mounted-on file, text file, and the root. */ /* * Vnode types. VNON means no type. */ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VMARKER }; /* * Each underlying filesystem allocates its own private area and hangs * it from v_data. If non-null, this area is freed in getnewvnode(). */ struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ }; /* * Reading or writing any of these items requires holding the appropriate lock. * * Lock reference: * c - namecache mutex * i - interlock * l - mp mnt_listmtx or freelist mutex * I - updated with atomics, 0->1 and 1->0 transitions with interlock held * m - mount point interlock * p - pollinfo lock * u - Only a reference to the vnode is needed to read. * v - vnode lock * * Vnodes may be found on many lists. The general way to deal with operating * on a vnode that is on a list is: * 1) Lock the list and find the vnode. * 2) Lock interlock so that the vnode does not go away. * 3) Unlock the list to avoid lock order reversals. * 4) vget with LK_INTERLOCK and check for ENOENT, or * 5) Check for DOOMED if the vnode lock is not required. * 6) Perform your operation, then vput(). */ #if defined(_KERNEL) || defined(_KVM_VNODE) struct vnode { /* * Fields which define the identity of the vnode. These fields are * owned by the filesystem (XXX: and vgone() ?) */ const char *v_tag; /* u type of underlying data */ struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ /* * Filesystem instance stuff */ struct mount *v_mount; /* u ptr to vfs we are in */ TAILQ_ENTRY(vnode) v_nmntvnodes; /* m vnodes for mount point */ /* * Type specific fields, only one applies to any given vnode. * See #defines below for renaming to v_* namespace. */ union { struct mount *vu_mount; /* v ptr to mountpoint (VDIR) */ struct socket *vu_socket; /* v unix domain net (VSOCK) */ struct cdev *vu_cdev; /* v device (VCHR, VBLK) */ struct fifoinfo *vu_fifoinfo; /* v fifo (VFIFO) */ } v_un; /* * vfs_hash: (mount + inode) -> vnode hash. The hash value * itself is grouped with other int fields, to avoid padding. */ LIST_ENTRY(vnode) v_hashlist; /* * VFS_namecache stuff */ LIST_HEAD(, namecache) v_cache_src; /* c Cache entries from us */ TAILQ_HEAD(, namecache) v_cache_dst; /* c Cache entries to us */ struct namecache *v_cache_dd; /* c Cache entry for .. vnode */ /* * Locking */ struct lock v_lock; /* u (if fs don't have one) */ struct mtx v_interlock; /* lock for "i" things */ struct lock *v_vnlock; /* u pointer to vnode lock */ /* * The machinery of being a vnode */ TAILQ_ENTRY(vnode) v_actfreelist; /* l vnode active/free lists */ struct bufobj v_bufobj; /* * Buffer cache object */ /* * Hooks for various subsystems and features. */ struct vpollinfo *v_pollinfo; /* i Poll events, p for *v_pi */ struct label *v_label; /* MAC label for vnode */ struct lockf *v_lockf; /* Byte-level advisory lock list */ struct rangelock v_rl; /* Byte-range lock */ /* * clustering stuff */ daddr_t v_cstart; /* v start block of cluster */ daddr_t v_lasta; /* v last allocation */ daddr_t v_lastw; /* v last write */ int v_clen; /* v length of cur. cluster */ u_int v_holdcnt; /* I prevents recycling. */ u_int v_usecount; /* I ref count of users */ u_int v_iflag; /* i vnode flags (see below) */ u_int v_vflag; /* v vnode flags */ u_int v_mflag; /* l mnt-specific vnode flags */ int v_writecount; /* v ref count of writers */ u_int v_hash; enum vtype v_type; /* u vnode type */ }; #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */ #define v_mountedhere v_un.vu_mount #define v_socket v_un.vu_socket #define v_rdev v_un.vu_cdev #define v_fifoinfo v_un.vu_fifoinfo #define bo2vnode(bo) __containerof((bo), struct vnode, v_bufobj) /* XXX: These are temporary to avoid a source sweep at this time */ #define v_object v_bufobj.bo_object /* * Userland version of struct vnode, for sysctl. */ struct xvnode { size_t xv_size; /* sizeof(struct xvnode) */ void *xv_vnode; /* address of real vnode */ u_long xv_flag; /* vnode vflags */ int xv_usecount; /* reference count of users */ int xv_writecount; /* reference count of writers */ int xv_holdcnt; /* page & buffer references */ u_long xv_id; /* capability identifier */ void *xv_mount; /* address of parent mount */ long xv_numoutput; /* num of writes in progress */ enum vtype xv_type; /* vnode type */ union { void *xvu_socket; /* socket, if VSOCK */ void *xvu_fifo; /* fifo, if VFIFO */ dev_t xvu_rdev; /* maj/min, if VBLK/VCHR */ struct { dev_t xvu_dev; /* device, if VDIR/VREG/VLNK */ ino_t xvu_ino; /* id, if VDIR/VREG/VLNK */ } xv_uns; } xv_un; }; #define xv_socket xv_un.xvu_socket #define xv_fifo xv_un.xvu_fifo #define xv_rdev xv_un.xvu_rdev #define xv_dev xv_un.xv_uns.xvu_dev #define xv_ino xv_un.xv_uns.xvu_ino /* We don't need to lock the knlist */ #define VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL || \ KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note)) #define VN_KNOTE(vp, b, a) \ do { \ if (!VN_KNLIST_EMPTY(vp)) \ KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \ (a) | KNF_NOKQLOCK); \ } while (0) #define VN_KNOTE_LOCKED(vp, b) VN_KNOTE(vp, b, KNF_LISTLOCKED) #define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) /* * Vnode flags. * VI flags are protected by interlock and live in v_iflag * VV flags are protected by the vnode lock and live in v_vflag * * VI_DOOMED is doubly protected by the interlock and vnode lock. Both * are required for writing but the status may be checked with either. */ #define VI_MOUNT 0x0020 /* Mount in progress */ #define VI_DOOMED 0x0080 /* This vnode is being recycled */ #define VI_FREE 0x0100 /* This vnode is on the freelist */ #define VI_ACTIVE 0x0200 /* This vnode is on the active list */ #define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x1000 /* Need to call inactive */ #define VV_ROOT 0x0001 /* root of its filesystem */ #define VV_ISTTY 0x0002 /* vnode represents a tty */ #define VV_NOSYNC 0x0004 /* unlinked, stop syncing */ #define VV_ETERNALDEV 0x0008 /* device that is never destroyed */ #define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */ #define VV_TEXT 0x0020 /* vnode is a pure text prototype */ #define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */ #define VV_SYSTEM 0x0080 /* vnode being used by kernel */ #define VV_PROCDEP 0x0100 /* vnode is process dependent */ #define VV_NOKNOTE 0x0200 /* don't activate knotes on this vnode */ #define VV_DELETED 0x0400 /* should be removed */ #define VV_MD 0x0800 /* vnode backs the md device */ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value * is unavailable (getattr) or which is not to be changed (setattr). */ struct vattr { enum vtype va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ - short va_nlink; /* number of references to file */ + u_short va_padding0; uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ + nlink_t va_nlink; /* number of references to file */ dev_t va_fsid; /* filesystem id */ - long va_fileid; /* file id */ + ino_t va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ struct timespec va_atime; /* time of last access */ struct timespec va_mtime; /* time of last modification */ struct timespec va_ctime; /* time file changed */ struct timespec va_birthtime; /* time file created */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device the special file represents */ u_quad_t va_bytes; /* bytes of disk space held by file */ u_quad_t va_filerev; /* file modification number */ u_int va_vaflags; /* operations flags, see below */ long va_spare; /* remain quad aligned */ }; /* * Flags for va_vaflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ #define VA_EXCLUSIVE 0x02 /* exclusive create request */ #define VA_SYNC 0x04 /* O_SYNC truncation */ /* * Flags for ioflag. (high 16 bits used to ask for read-ahead and * help with write clustering) * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h */ #define IO_UNIT 0x0001 /* do I/O as atomic unit */ #define IO_APPEND 0x0002 /* append write to end */ #define IO_NDELAY 0x0004 /* FNDELAY flag set in file table */ #define IO_NODELOCKED 0x0008 /* underlying node already locked */ #define IO_ASYNC 0x0010 /* bawrite rather then bdwrite */ #define IO_VMIO 0x0020 /* data already in VMIO space */ #define IO_INVAL 0x0040 /* invalidate after I/O */ #define IO_SYNC 0x0080 /* do I/O synchronously */ #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */ #define IO_NOREUSE 0x0200 /* VMIO data won't be reused */ #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ #define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ #define IO_RANGELOCKED 0x4000 /* range locked */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ /* * Flags for accmode_t. */ #define VEXEC 000000000100 /* execute/search permission */ #define VWRITE 000000000200 /* write permission */ #define VREAD 000000000400 /* read permission */ #define VADMIN 000000010000 /* being the file owner */ #define VAPPEND 000000040000 /* permission to write/append */ /* * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL, * and 0 otherwise. This never happens with ordinary unix access rights * or POSIX.1e ACLs. Obviously, VEXPLICIT_DENY must be OR-ed with * some other V* constant. */ #define VEXPLICIT_DENY 000000100000 #define VREAD_NAMED_ATTRS 000000200000 /* not used */ #define VWRITE_NAMED_ATTRS 000000400000 /* not used */ #define VDELETE_CHILD 000001000000 #define VREAD_ATTRIBUTES 000002000000 /* permission to stat(2) */ #define VWRITE_ATTRIBUTES 000004000000 /* change {m,c,a}time */ #define VDELETE 000010000000 #define VREAD_ACL 000020000000 /* read ACL and file mode */ #define VWRITE_ACL 000040000000 /* change ACL and/or file mode */ #define VWRITE_OWNER 000100000000 /* change file owner */ #define VSYNCHRONIZE 000200000000 /* not used */ #define VCREAT 000400000000 /* creating new file */ #define VVERIFY 001000000000 /* verification required */ /* * Permissions that were traditionally granted only to the file owner. */ #define VADMIN_PERMS (VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \ VWRITE_OWNER) /* * Permissions that were traditionally granted to everyone. */ #define VSTAT_PERMS (VREAD_ATTRIBUTES | VREAD_ACL) /* * Permissions that allow to change the state of the file in any way. */ #define VMODIFY_PERMS (VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \ VDELETE) /* * Token indicating no attribute value yet assigned. */ #define VNOVAL (-1) /* * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon) */ #define VLKTIMEOUT (hz / 20 + 1) #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_VNODE); #endif extern u_int ncsizefactor; /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). */ extern enum vtype iftovt_tab[]; extern int vttoif_tab[]; #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) /* * Flags to various vnode functions. */ #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ #define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ #define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */ #define V_VMIO 0x0010 /* vinvalbuf: called during pageout */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ #define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */ #define V_MNTREF 0x0010 /* vn_start_write: mp is already ref-ed */ #define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */ #define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */ #define VS_SKIP_UNMOUNT 0x0001 /* vfs_write_suspend: fail if the filesystem is being unmounted */ #define VREF(vp) vref(vp) #ifdef DIAGNOSTIC #define VATTR_NULL(vap) vattr_null(vap) #else #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ #endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) /* * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern struct mount *rootdevmp; /* "/dev" mount */ extern int desiredvnodes; /* number of vnodes desired */ extern struct uma_zone *namei_zone; extern struct vattr va_null; /* predefined null vattr structure */ #define VI_LOCK(vp) mtx_lock(&(vp)->v_interlock) #define VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags)) #define VI_TRYLOCK(vp) mtx_trylock(&(vp)->v_interlock) #define VI_UNLOCK(vp) mtx_unlock(&(vp)->v_interlock) #define VI_MTX(vp) (&(vp)->v_interlock) #define VN_LOCK_AREC(vp) lockallowrecurse((vp)->v_vnlock) #define VN_LOCK_ASHARE(vp) lockallowshare((vp)->v_vnlock) #define VN_LOCK_DSHARE(vp) lockdisableshare((vp)->v_vnlock) #endif /* _KERNEL */ /* * Mods for extensibility. */ /* * Flags for vdesc_flags: */ #define VDESC_MAX_VPS 16 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ #define VDESC_VP0_WILLRELE 0x0001 #define VDESC_VP1_WILLRELE 0x0002 #define VDESC_VP2_WILLRELE 0x0004 #define VDESC_VP3_WILLRELE 0x0008 #define VDESC_NOMAP_VPP 0x0100 #define VDESC_VPP_WILLRELE 0x0200 /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ struct vop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; typedef int vop_bypass_t(struct vop_generic_args *); /* * VDESC_NO_OFFSET is used to identify the end of the offset list * and in places where no such field exists. */ #define VDESC_NO_OFFSET -1 /* * This structure describes the vnode operation taking place. */ struct vnodeop_desc { char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ vop_bypass_t *vdesc_call; /* Function to call */ /* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_thread_offset; /* thread location, if any */ int vdesc_componentname_offset; /* if any */ }; #ifdef _KERNEL /* * A list of all the operation descs. */ extern struct vnodeop_desc *vnodeop_descs[]; #define VOPARG_OFFSETOF(s_type, field) __offsetof(s_type, field) #define VOPARG_OFFSETTO(s_type, s_offset, struct_p) \ ((s_type)(((char*)(struct_p)) + (s_offset))) #ifdef DEBUG_VFS_LOCKS /* * Support code to aid in debugging VFS locking problems. Not totally * reliable since if the thread sleeps between changing the lock * state and checking it with the assert, some other thread could * change the state. They are good enough for debugging a single * filesystem using a single-threaded test. Note that the unreliability is * limited to false negatives; efforts were made to ensure that false * positives cannot occur. */ void assert_vi_locked(struct vnode *vp, const char *str); void assert_vi_unlocked(struct vnode *vp, const char *str); void assert_vop_elocked(struct vnode *vp, const char *str); void assert_vop_locked(struct vnode *vp, const char *str); void assert_vop_unlocked(struct vnode *vp, const char *str); #define ASSERT_VI_LOCKED(vp, str) assert_vi_locked((vp), (str)) #define ASSERT_VI_UNLOCKED(vp, str) assert_vi_unlocked((vp), (str)) #define ASSERT_VOP_ELOCKED(vp, str) assert_vop_elocked((vp), (str)) #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) #else /* !DEBUG_VFS_LOCKS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) #define ASSERT_VI_UNLOCKED(vp, str) ((void)0) #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) #endif /* DEBUG_VFS_LOCKS */ /* * This call works for vnodes in the kernel. */ #define VCALL(c) ((c)->a_desc->vdesc_call(c)) #define DOINGASYNC(vp) \ (((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 && \ ((curthread->td_pflags & TDP_SYNCIO) == 0)) /* * VMIO support inline */ extern int vmiodirenable; static __inline int vn_canvmio(struct vnode *vp) { if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR))) return(TRUE); return(FALSE); } /* * Finally, include the default set of vnode operations. */ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int); #include "vnode_if.h" /* vn_open_flags */ #define VN_OPEN_NOAUDIT 0x00000001 #define VN_OPEN_NOCAPCHECK 0x00000002 #define VN_OPEN_NAMECACHE 0x00000004 /* * Public vnode manipulation functions. */ struct componentname; struct file; struct mount; struct nameidata; struct ostat; +struct freebsd11_stat; struct thread; struct proc; struct stat; struct nstat; struct ucred; struct uio; struct vattr; struct vfsops; struct vnode; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn); /* cache_* may belong in namei.h. */ void cache_changesize(int newhashsize); #define cache_enter(dvp, vp, cnp) \ cache_enter_time(dvp, vp, cnp, NULL, NULL) void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp); int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp); void cache_purge(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp, bool force); int change_dir(struct vnode *vp, struct thread *td); void cvtstat(struct stat *st, struct ostat *ost); -void cvtnstat(struct stat *sb, struct nstat *nsb); +void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb); +void freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost); int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp); void getnewvnode_reserve(u_int count); void getnewvnode_drop_reserve(void); int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg); int insmntque(struct vnode *vp, struct mount *mp); u_quad_t init_va_filerev(void); int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); struct vnode * vn_dir_dd_ino(struct vnode *vp); int vn_commname(struct vnode *vn, char *buf, u_int buflen); int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen); int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *aclp, accmode_t accmode, struct ucred *cred, int *privused); int vaccess_acl_posix1e(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *acl, accmode_t accmode, struct ucred *cred, int *privused); void vattr_null(struct vattr *vap); int vcount(struct vnode *vp); #define vdrop(vp) _vdrop((vp), 0) #define vdropl(vp) _vdrop((vp), 1) void _vdrop(struct vnode *, bool); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int lockflag, struct thread *td); void vgone(struct vnode *vp); #define vhold(vp) _vhold((vp), 0) #define vholdl(vp) _vhold((vp), 1) void _vhold(struct vnode *, bool); void vinactive(struct vnode *, struct thread *); int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo); int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_isdisk(struct vnode *vp, int *errp); int _vn_lock(struct vnode *vp, int flags, char *file, int line); #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__) int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp); int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, struct ucred *cred, struct file *fp); int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp); void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); int vn_pollrecord(struct vnode *vp, struct thread *p, int events); int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid, struct thread *td); int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio, struct thread *td); int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred, struct ucred *file_cred, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags); int vn_writechk(struct vnode *vp); int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int *buflen, char *buf, struct thread *td); int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int buflen, char *buf, struct thread *td); int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, struct thread *td); int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp); int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg, int lkflags, struct vnode **rvp); int vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio); int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio); #define vn_rangelock_unlock(vp, cookie) \ rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp)) #define vn_rangelock_unlock_range(vp, cookie, start, end) \ rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), \ VI_MTX(vp)) #define vn_rangelock_rlock(vp, start, end) \ rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) #define vn_rangelock_wlock(vp, start, end) \ rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) int vfs_cache_lookup(struct vop_lookup_args *ap); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp, int flags); int vfs_write_suspend(struct mount *mp, int flags); int vfs_write_suspend_umnt(struct mount *mp); void vnlru_free(int, struct vfsops *); int vop_stdbmap(struct vop_bmap_args *); int vop_stdfdatasync_buf(struct vop_fdatasync_args *); int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); int vop_stdislocked(struct vop_islocked_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdputpages(struct vop_putpages_args *); int vop_stdunlock(struct vop_unlock_args *); int vop_nopoll(struct vop_poll_args *); int vop_stdaccess(struct vop_access_args *ap); int vop_stdaccessx(struct vop_accessx_args *ap); int vop_stdadvise(struct vop_advise_args *ap); int vop_stdadvlock(struct vop_advlock_args *ap); int vop_stdadvlockasync(struct vop_advlockasync_args *ap); int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap); int vop_stdallocate(struct vop_allocate_args *ap); int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); int vop_stdvptofh(struct vop_vptofh_args *ap); int vop_stdunp_bind(struct vop_unp_bind_args *ap); int vop_stdunp_connect(struct vop_unp_connect_args *ap); int vop_stdunp_detach(struct vop_unp_detach_args *ap); int vop_eopnotsupp(struct vop_generic_args *ap); int vop_ebadf(struct vop_generic_args *ap); int vop_einval(struct vop_generic_args *ap); int vop_enoent(struct vop_generic_args *ap); int vop_enotty(struct vop_generic_args *ap); int vop_null(struct vop_generic_args *ap); int vop_panic(struct vop_generic_args *ap); int dead_poll(struct vop_poll_args *ap); int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ void vop_close_post(void *a, int rc); void vop_create_post(void *a, int rc); void vop_deleteextattr_post(void *a, int rc); void vop_link_post(void *a, int rc); void vop_lookup_post(void *a, int rc); void vop_lookup_pre(void *a); void vop_mkdir_post(void *a, int rc); void vop_mknod_post(void *a, int rc); void vop_open_post(void *a, int rc); void vop_read_post(void *a, int rc); void vop_readdir_post(void *a, int rc); void vop_reclaim_post(void *a, int rc); void vop_remove_post(void *a, int rc); void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); void vop_rmdir_post(void *a, int rc); void vop_setattr_post(void *a, int rc); void vop_setextattr_post(void *a, int rc); void vop_symlink_post(void *a, int rc); #ifdef DEBUG_VFS_LOCKS void vop_strategy_pre(void *a); void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); void vop_unlock_post(void *a, int rc); void vop_unlock_pre(void *a); #else #define vop_strategy_pre(x) do { } while (0) #define vop_lock_pre(x) do { } while (0) #define vop_lock_post(x, y) do { } while (0) #define vop_unlock_post(x, y) do { } while (0) #define vop_unlock_pre(x) do { } while (0) #endif void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_PRE(ap) \ struct vattr va; \ int error; \ off_t osize, ooffset, noffset; \ \ osize = ooffset = noffset = 0; \ if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred); \ if (error) \ return (error); \ ooffset = (ap)->a_uio->uio_offset; \ osize = (off_t)va.va_size; \ } #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ | (noffset > osize ? NOTE_EXTEND : 0)); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) void vput(struct vnode *vp); void vrele(struct vnode *vp); void vref(struct vnode *vp); void vrefl(struct vnode *vp); void vrefact(struct vnode *vp); int vrefcnt(struct vnode *vp); void v_addpollinfo(struct vnode *vp); int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td); void vnode_destroy_vobject(struct vnode *vp); extern struct vop_vector fifo_specops; extern struct vop_vector dead_vnodeops; extern struct vop_vector default_vnodeops; #define VOP_PANIC ((void*)(uintptr_t)vop_panic) #define VOP_NULL ((void*)(uintptr_t)vop_null) #define VOP_EBADF ((void*)(uintptr_t)vop_ebadf) #define VOP_ENOTTY ((void*)(uintptr_t)vop_enotty) #define VOP_EINVAL ((void*)(uintptr_t)vop_einval) #define VOP_ENOENT ((void*)(uintptr_t)vop_enoent) #define VOP_EOPNOTSUPP ((void*)(uintptr_t)vop_eopnotsupp) /* fifo_vnops.c */ int fifo_printinfo(struct vnode *); /* vfs_hash.c */ typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg); void vfs_hash_changesize(int newhashsize); int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); u_int vfs_hash_index(struct vnode *vp); int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); void vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); void vfs_hash_rehash(struct vnode *vp, u_int hash); void vfs_hash_remove(struct vnode *vp); int vfs_kqfilter(struct vop_kqfilter_args *); void vfs_mark_atime(struct vnode *vp, struct ucred *cred); struct dirent; int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off); int vfs_unixify_accmode(accmode_t *accmode); void vfs_unp_reclaim(struct vnode *vp); int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode); int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid); int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td); int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td); #endif /* _KERNEL */ #endif /* !_SYS_VNODE_H_ */ Index: head/sys/vm/swap_pager.c =================================================================== --- head/sys/vm/swap_pager.c (revision 318735) +++ head/sys/vm/swap_pager.c (revision 318736) @@ -1,2825 +1,2850 @@ /*- * Copyright (c) 1998 Matthew Dillon, * Copyright (c) 1994 John S. Dyson * Copyright (c) 1990 University of Utah. * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * New Swap System * Matthew Dillon * * Radix Bitmap 'blists'. * * - The new swapper uses the new radix bitmap code. This should scale * to arbitrarily small or arbitrarily large swap spaces and an almost * arbitrary degree of fragmentation. * * Features: * * - on the fly reallocation of swap during putpages. The new system * does not try to keep previously allocated swap blocks for dirty * pages. * * - on the fly deallocation of swap * * - No more garbage collection required. Unnecessarily allocated swap * blocks only exist for dirty vm_page_t's now and these are already * cycled (in a high-load system) by the pager. We also do on-the-fly * removal of invalidated swap blocks when a page is destroyed * or renamed. * * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 */ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" #include "opt_swap.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * SWB_NPAGES must be a power of 2. It may be set to 1, 2, 4, 8, 16 * or 32 pages per allocation. * The 32-page limit is due to the radix code (kern/subr_blist.c). */ #ifndef MAX_PAGEOUT_CLUSTER #define MAX_PAGEOUT_CLUSTER 16 #endif #if !defined(SWB_NPAGES) #define SWB_NPAGES MAX_PAGEOUT_CLUSTER #endif /* * The swblock structure maps an object and a small, fixed-size range * of page indices to disk addresses within a swap area. * The collection of these mappings is implemented as a hash table. * Unused disk addresses within a swap area are allocated and managed * using a blist. */ #define SWCORRECT(n) (sizeof(void *) * (n) / sizeof(daddr_t)) #define SWAP_META_PAGES (SWB_NPAGES * 2) #define SWAP_META_MASK (SWAP_META_PAGES - 1) struct swblock { struct swblock *swb_hnext; vm_object_t swb_object; vm_pindex_t swb_index; int swb_count; daddr_t swb_pages[SWAP_META_PAGES]; }; static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data"); static struct mtx sw_dev_mtx; static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq); static struct swdevt *swdevhd; /* Allocate from here next */ static int nswapdev; /* Number of swap devices */ int swap_pager_avail; static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ static vm_ooffset_t swap_total; SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0, "Total amount of available swap storage."); static vm_ooffset_t swap_reserved; SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0, "Amount of swap storage needed to back all allocated anonymous memory."); static int overcommit = 0; SYSCTL_INT(_vm, OID_AUTO, overcommit, CTLFLAG_RW, &overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " "for details."); static unsigned long swzone; SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0, "Actual size of swap metadata zone"); static unsigned long swap_maxpages; SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, "Maximum amount of swap supported"); /* bits from overcommit */ #define SWAP_RESERVE_FORCE_ON (1 << 0) #define SWAP_RESERVE_RLIMIT_ON (1 << 1) #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) int swap_reserve(vm_ooffset_t incr) { return (swap_reserve_by_cred(incr, curthread->td_ucred)); } int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { vm_ooffset_t r, s; int res, error; static int curfail; static struct timeval lastfail; struct uidinfo *uip; uip = cred->cr_ruidinfo; if (incr & PAGE_MASK) panic("swap_reserve: & PAGE_MASK"); #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); error = racct_add(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); if (error != 0) return (0); } #endif res = 0; mtx_lock(&sw_dev_mtx); r = swap_reserved + incr; if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_cnt.v_wire_count; s *= PAGE_SIZE; } else s = 0; s += swap_total; if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { res = 1; swap_reserved = r; } mtx_unlock(&sw_dev_mtx); if (res) { UIDINFO_VMSIZE_LOCK(uip); if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) && priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) res = 0; else uip->ui_vmsize += incr; UIDINFO_VMSIZE_UNLOCK(uip); if (!res) { mtx_lock(&sw_dev_mtx); swap_reserved -= incr; mtx_unlock(&sw_dev_mtx); } } if (!res && ppsratecheck(&lastfail, &curfail, 1)) { printf("uid %d, pid %d: swap reservation for %jd bytes failed\n", uip->ui_uid, curproc->p_pid, incr); } #ifdef RACCT if (!res) { PROC_LOCK(curproc); racct_sub(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); } #endif return (res); } void swap_reserve_force(vm_ooffset_t incr) { struct uidinfo *uip; mtx_lock(&sw_dev_mtx); swap_reserved += incr; mtx_unlock(&sw_dev_mtx); #ifdef RACCT PROC_LOCK(curproc); racct_add_force(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); #endif uip = curthread->td_ucred->cr_ruidinfo; PROC_LOCK(curproc); UIDINFO_VMSIZE_LOCK(uip); uip->ui_vmsize += incr; UIDINFO_VMSIZE_UNLOCK(uip); PROC_UNLOCK(curproc); } void swap_release(vm_ooffset_t decr) { struct ucred *cred; PROC_LOCK(curproc); cred = curthread->td_ucred; swap_release_by_cred(decr, cred); PROC_UNLOCK(curproc); } void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { struct uidinfo *uip; uip = cred->cr_ruidinfo; if (decr & PAGE_MASK) panic("swap_release: & PAGE_MASK"); mtx_lock(&sw_dev_mtx); if (swap_reserved < decr) panic("swap_reserved < decr"); swap_reserved -= decr; mtx_unlock(&sw_dev_mtx); UIDINFO_VMSIZE_LOCK(uip); if (uip->ui_vmsize < decr) printf("negative vmsize for uid = %d\n", uip->ui_uid); uip->ui_vmsize -= decr; UIDINFO_VMSIZE_UNLOCK(uip); racct_sub_cred(cred, RACCT_SWAP, decr); } #define SWM_FREE 0x02 /* free, period */ #define SWM_POP 0x04 /* pop out */ int swap_pager_full = 2; /* swap space exhaustion (task killing) */ static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/ static int nsw_rcount; /* free read buffers */ static int nsw_wcount_sync; /* limit write buffers / synchronous */ static int nsw_wcount_async; /* limit write buffers / asynchronous */ static int nsw_wcount_async_max;/* assigned maximum */ static int nsw_cluster_max; /* maximum VOP I/O allowed */ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I", "Maximum running async swap ops"); static struct swblock **swhash; static int swhash_mask; static struct mtx swhash_mtx; static struct sx sw_alloc_sx; /* * "named" and "unnamed" anon region objects. Try to reduce the overhead * of searching a named list by hashing it just a little. */ #define NOBJLISTS 8 #define NOBJLIST(handle) \ (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) static struct pagerlst swap_pager_object_list[NOBJLISTS]; static uma_zone_t swap_zone; /* * pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure * calls hooked from other parts of the VM system and do not appear here. * (see vm/swap_pager.h). */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *); static void swap_pager_dealloc(vm_object_t object); static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); static void swap_pager_init(void); static void swap_pager_unswapped(vm_page_t); static void swap_pager_swapoff(struct swdevt *sp); struct pagerops swappagerops = { .pgo_init = swap_pager_init, /* early system initialization of pager */ .pgo_alloc = swap_pager_alloc, /* allocate an OBJT_SWAP object */ .pgo_dealloc = swap_pager_dealloc, /* deallocate an OBJT_SWAP object */ .pgo_getpages = swap_pager_getpages, /* pagein */ .pgo_getpages_async = swap_pager_getpages_async, /* pagein (async) */ .pgo_putpages = swap_pager_putpages, /* pageout */ .pgo_haspage = swap_pager_haspage, /* get backing store status for page */ .pgo_pageunswapped = swap_pager_unswapped, /* remove swap related to page */ }; /* * dmmax is in page-sized chunks with the new swap system. It was * dev-bsized chunks in the old. dmmax is always a power of 2. * * swap_*() routines are externally accessible. swp_*() routines are * internal. */ static int dmmax; static int nswap_lowat = 128; /* in pages, swap_pager_almost_full warn */ static int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */ SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &dmmax, 0, "Maximum size of a swap block"); static void swp_sizecheck(void); static void swp_pager_async_iodone(struct buf *bp); static int swapongeom(struct vnode *); static int swaponvp(struct thread *, struct vnode *, u_long); static int swapoff_one(struct swdevt *sp, struct ucred *cred); /* * Swap bitmap functions */ static void swp_pager_freeswapspace(daddr_t blk, int npages); static daddr_t swp_pager_getswapspace(int npages); /* * Metadata functions */ static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index); static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); /* * SWP_SIZECHECK() - update swap_pager_full indication * * update the swap_pager_almost_full indication and warn when we are * about to run out of swap space, using lowat/hiwat hysteresis. * * Clear swap_pager_full ( task killing ) indication when lowat is met. * * No restrictions on call * This routine may not block. */ static void swp_sizecheck(void) { if (swap_pager_avail < nswap_lowat) { if (swap_pager_almost_full == 0) { printf("swap_pager: out of swap space\n"); swap_pager_almost_full = 1; } } else { swap_pager_full = 0; if (swap_pager_avail > nswap_hiwat) swap_pager_almost_full = 0; } } /* * SWP_PAGER_HASH() - hash swap meta data * * This is an helper function which hashes the swapblk given * the object and page index. It returns a pointer to a pointer * to the object, or a pointer to a NULL pointer if it could not * find a swapblk. */ static struct swblock ** swp_pager_hash(vm_object_t object, vm_pindex_t index) { struct swblock **pswap; struct swblock *swap; index &= ~(vm_pindex_t)SWAP_META_MASK; pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask]; while ((swap = *pswap) != NULL) { if (swap->swb_object == object && swap->swb_index == index ) { break; } pswap = &swap->swb_hnext; } return (pswap); } /* * SWAP_PAGER_INIT() - initialize the swap pager! * * Expected to be started from system init. NOTE: This code is run * before much else so be careful what you depend on. Most of the VM * system has yet to be initialized at this point. */ static void swap_pager_init(void) { /* * Initialize object lists */ int i; for (i = 0; i < NOBJLISTS; ++i) TAILQ_INIT(&swap_pager_object_list[i]); mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); sx_init(&sw_alloc_sx, "swspsx"); sx_init(&swdev_syscall_lock, "swsysc"); /* * Device Stripe, in PAGE_SIZE'd blocks */ dmmax = SWB_NPAGES * 2; } /* * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process * * Expected to be started from pageout process once, prior to entering * its main loop. */ void swap_pager_swap_init(void) { unsigned long n, n2; /* * Number of in-transit swap bp operations. Don't * exhaust the pbufs completely. Make sure we * initialize workable values (0 will work for hysteresis * but it isn't very efficient). * * The nsw_cluster_max is constrained by the bp->b_pages[] * array (MAXPHYS/PAGE_SIZE) and our locally defined * MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are * constrained by the swap device interleave stripe size. * * Currently we hardwire nsw_wcount_async to 4. This limit is * designed to prevent other I/O from having high latencies due to * our pageout I/O. The value 4 works well for one or two active swap * devices but is probably a little low if you have more. Even so, * a higher value would probably generate only a limited improvement * with three or four active swap devices since the system does not * typically have to pageout at extreme bandwidths. We will want * at least 2 per swap devices, and 4 is a pretty good value if you * have one NFS swap device due to the command/ack latency over NFS. * So it all works out pretty well. */ nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER); mtx_lock(&pbuf_mtx); nsw_rcount = (nswbuf + 1) / 2; nsw_wcount_sync = (nswbuf + 3) / 4; nsw_wcount_async = 4; nsw_wcount_async_max = nsw_wcount_async; mtx_unlock(&pbuf_mtx); /* * Initialize our zone. Right now I'm just guessing on the number * we need based on the number of pages in the system. Each swblock * can hold 32 pages, so this is probably overkill. This reservation * is typically limited to around 32MB by default. */ n = vm_cnt.v_page_count / 2; if (maxswzone && n > maxswzone / sizeof(struct swblock)) n = maxswzone / sizeof(struct swblock); n2 = n; swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); if (swap_zone == NULL) panic("failed to create swap_zone."); do { if (uma_zone_reserve_kva(swap_zone, n)) break; /* * if the allocation failed, try a zone two thirds the * size of the previous attempt. */ n -= ((n + 2) / 3); } while (n > 0); if (n2 != n) printf("Swap zone entries reduced from %lu to %lu.\n", n2, n); swap_maxpages = n * SWAP_META_PAGES; swzone = n * sizeof(struct swblock); n2 = n; /* * Initialize our meta-data hash table. The swapper does not need to * be quite as efficient as the VM system, so we do not use an * oversized hash table. * * n: size of hash table, must be power of 2 * swhash_mask: hash table index mask */ for (n = 1; n < n2 / 8; n *= 2) ; swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO); swhash_mask = n - 1; mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF); } static vm_object_t swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size, vm_ooffset_t offset) { vm_object_t object; if (cred != NULL) { if (!swap_reserve_by_cred(size, cred)) return (NULL); crhold(cred); } object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset + PAGE_MASK + size)); object->handle = handle; if (cred != NULL) { object->cred = cred; object->charge = size; } object->un_pager.swp.swp_bcount = 0; return (object); } /* * SWAP_PAGER_ALLOC() - allocate a new OBJT_SWAP VM object and instantiate * its metadata structures. * * This routine is called from the mmap and fork code to create a new * OBJT_SWAP object. * * This routine must ensure that no live duplicate is created for * the named object request, which is protected against by * holding the sw_alloc_sx lock in case handle != NULL. */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; if (handle != NULL) { /* * Reference existing named region or allocate new one. There * should not be a race here against swp_pager_meta_build() * as called from vm_page_remove() in regards to the lookup * of the handle. */ sx_xlock(&sw_alloc_sx); object = vm_pager_object_lookup(NOBJLIST(handle), handle); if (object == NULL) { object = swap_pager_alloc_init(handle, cred, size, offset); if (object != NULL) { TAILQ_INSERT_TAIL(NOBJLIST(object->handle), object, pager_object_list); } } sx_xunlock(&sw_alloc_sx); } else { object = swap_pager_alloc_init(handle, cred, size, offset); } return (object); } /* * SWAP_PAGER_DEALLOC() - remove swap metadata from object * * The swap backing for the object is destroyed. The code is * designed such that we can reinstantiate it later, but this * routine is typically called only when the entire object is * about to be destroyed. * * The object must be locked. */ static void swap_pager_dealloc(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_DEAD) != 0, ("dealloc of reachable obj")); /* * Remove from list right away so lookups will fail if we block for * pageout completion. */ if (object->handle != NULL) { VM_OBJECT_WUNLOCK(object); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list); sx_xunlock(&sw_alloc_sx); VM_OBJECT_WLOCK(object); } vm_object_pip_wait(object, "swpdea"); /* * Free all remaining metadata. We only bother to free it from * the swap meta data. We do not attempt to free swapblk's still * associated with vm_page_t's for this object. We do not care * if paging is still in progress on some objects. */ swp_pager_meta_free_all(object); object->handle = NULL; object->type = OBJT_DEAD; } /************************************************************************ * SWAP PAGER BITMAP ROUTINES * ************************************************************************/ /* * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space * * Allocate swap for the requested number of pages. The starting * swap block number (a page index) is returned or SWAPBLK_NONE * if the allocation failed. * * Also has the side effect of advising that somebody made a mistake * when they configured swap and didn't configure enough. * * This routine may not sleep. * * We allocate in round-robin fashion from the configured devices. */ static daddr_t swp_pager_getswapspace(int npages) { daddr_t blk; struct swdevt *sp; int i; blk = SWAPBLK_NONE; mtx_lock(&sw_dev_mtx); sp = swdevhd; for (i = 0; i < nswapdev; i++) { if (sp == NULL) sp = TAILQ_FIRST(&swtailq); if (!(sp->sw_flags & SW_CLOSING)) { blk = blist_alloc(sp->sw_blist, npages); if (blk != SWAPBLK_NONE) { blk += sp->sw_first; sp->sw_used += npages; swap_pager_avail -= npages; swp_sizecheck(); swdevhd = TAILQ_NEXT(sp, sw_list); goto done; } } sp = TAILQ_NEXT(sp, sw_list); } if (swap_pager_full != 2) { printf("swap_pager_getswapspace(%d): failed\n", npages); swap_pager_full = 2; swap_pager_almost_full = 1; } swdevhd = NULL; done: mtx_unlock(&sw_dev_mtx); return (blk); } static int swp_pager_isondev(daddr_t blk, struct swdevt *sp) { return (blk >= sp->sw_first && blk < sp->sw_end); } static void swp_pager_strategy(struct buf *bp) { struct swdevt *sp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (bp->b_blkno >= sp->sw_first && bp->b_blkno < sp->sw_end) { mtx_unlock(&sw_dev_mtx); if ((sp->sw_flags & SW_UNMAPPED) != 0 && unmapped_buf_allowed) { bp->b_data = unmapped_buf; bp->b_offset = 0; } else { pmap_qenter((vm_offset_t)bp->b_data, &bp->b_pages[0], bp->b_bcount / PAGE_SIZE); } sp->sw_strategy(bp, sp); return; } } panic("Swapdev not found"); } /* * SWP_PAGER_FREESWAPSPACE() - free raw swap space * * This routine returns the specified swap blocks back to the bitmap. * * This routine may not sleep. */ static void swp_pager_freeswapspace(daddr_t blk, int npages) { struct swdevt *sp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (blk >= sp->sw_first && blk < sp->sw_end) { sp->sw_used -= npages; /* * If we are attempting to stop swapping on * this device, we don't want to mark any * blocks free lest they be reused. */ if ((sp->sw_flags & SW_CLOSING) == 0) { blist_free(sp->sw_blist, blk - sp->sw_first, npages); swap_pager_avail += npages; swp_sizecheck(); } mtx_unlock(&sw_dev_mtx); return; } } panic("Swapdev not found"); } /* * SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page * range within an object. * * This is a globally accessible routine. * * This routine removes swapblk assignments from swap metadata. * * The external callers of this routine typically have already destroyed * or renamed vm_page_t's associated with this range in the object so * we should be ok. * * The object must be locked. */ void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) { swp_pager_meta_free(object, start, size); } /* * SWAP_PAGER_RESERVE() - reserve swap blocks in object * * Assigns swap blocks to the specified range within the object. The * swap blocks are not zeroed. Any previous swap assignment is destroyed. * * Returns 0 on success, -1 on failure. */ int swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) { int n = 0; daddr_t blk = SWAPBLK_NONE; vm_pindex_t beg = start; /* save start index */ VM_OBJECT_WLOCK(object); while (size) { if (n == 0) { n = BLIST_MAX_ALLOC; while ((blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE) { n >>= 1; if (n == 0) { swp_pager_meta_free(object, beg, start - beg); VM_OBJECT_WUNLOCK(object); return (-1); } } } swp_pager_meta_build(object, start, blk); --size; ++start; ++blk; --n; } swp_pager_meta_free(object, start, n); VM_OBJECT_WUNLOCK(object); return (0); } /* * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager * and destroy the source. * * Copy any valid swapblks from the source to the destination. In * cases where both the source and destination have a valid swapblk, * we keep the destination's. * * This routine is allowed to sleep. It may sleep allocating metadata * indirectly through swp_pager_meta_build() or if paging is still in * progress on the source. * * The source object contains no vm_page_t's (which is just as well) * * The source object is of type OBJT_SWAP. * * The source and destination objects must be locked. * Both object locks may temporarily be released. */ void swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t offset, int destroysource) { vm_pindex_t i; VM_OBJECT_ASSERT_WLOCKED(srcobject); VM_OBJECT_ASSERT_WLOCKED(dstobject); /* * If destroysource is set, we remove the source object from the * swap_pager internal queue now. */ if (destroysource && srcobject->handle != NULL) { vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); vm_object_pip_add(dstobject, 1); VM_OBJECT_WUNLOCK(dstobject); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject, pager_object_list); sx_xunlock(&sw_alloc_sx); VM_OBJECT_WLOCK(dstobject); vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); vm_object_pip_wakeup(srcobject); } /* * transfer source to destination. */ for (i = 0; i < dstobject->size; ++i) { daddr_t dstaddr; /* * Locate (without changing) the swapblk on the destination, * unless it is invalid in which case free it silently, or * if the destination is a resident page, in which case the * source is thrown away. */ dstaddr = swp_pager_meta_ctl(dstobject, i, 0); if (dstaddr == SWAPBLK_NONE) { /* * Destination has no swapblk and is not resident, * copy source. */ daddr_t srcaddr; srcaddr = swp_pager_meta_ctl( srcobject, i + offset, SWM_POP ); if (srcaddr != SWAPBLK_NONE) { /* * swp_pager_meta_build() can sleep. */ vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); vm_object_pip_add(dstobject, 1); swp_pager_meta_build(dstobject, i, srcaddr); vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); vm_object_pip_wakeup(srcobject); } } else { /* * Destination has valid swapblk or it is represented * by a resident page. We destroy the sourceblock. */ swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE); } } /* * Free left over swap blocks in source. * * We have to revert the type to OBJT_DEFAULT so we do not accidentally * double-remove the object from the swap queues. */ if (destroysource) { swp_pager_meta_free_all(srcobject); /* * Reverting the type is not necessary, the caller is going * to destroy srcobject directly, but I'm doing it here * for consistency since we've removed the object from its * queues. */ srcobject->type = OBJT_DEFAULT; } } /* * SWAP_PAGER_HASPAGE() - determine if we have good backing store for * the requested page. * * We determine whether good backing store exists for the requested * page and return TRUE if it does, FALSE if it doesn't. * * If TRUE, we also try to determine how much valid, contiguous backing * store exists before and after the requested page. */ static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { daddr_t blk, blk0; int i; VM_OBJECT_ASSERT_LOCKED(object); /* * do we have good backing store at the requested index ? */ blk0 = swp_pager_meta_ctl(object, pindex, 0); if (blk0 == SWAPBLK_NONE) { if (before) *before = 0; if (after) *after = 0; return (FALSE); } /* * find backwards-looking contiguous good backing store */ if (before != NULL) { for (i = 1; i < SWB_NPAGES; i++) { if (i > pindex) break; blk = swp_pager_meta_ctl(object, pindex - i, 0); if (blk != blk0 - i) break; } *before = i - 1; } /* * find forward-looking contiguous good backing store */ if (after != NULL) { for (i = 1; i < SWB_NPAGES; i++) { blk = swp_pager_meta_ctl(object, pindex + i, 0); if (blk != blk0 + i) break; } *after = i - 1; } return (TRUE); } /* * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page * * This removes any associated swap backing store, whether valid or * not, from the page. * * This routine is typically called when a page is made dirty, at * which point any associated swap can be freed. MADV_FREE also * calls us in a special-case situation * * NOTE!!! If the page is clean and the swap was valid, the caller * should make the page dirty before calling this routine. This routine * does NOT change the m->dirty status of the page. Also: MADV_FREE * depends on it. * * This routine may not sleep. * * The object containing the page must be locked. */ static void swap_pager_unswapped(vm_page_t m) { swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE); } /* * swap_pager_getpages() - bring pages in from swap * * Attempt to page in the pages in array "m" of length "count". The caller * may optionally specify that additional pages preceding and succeeding * the specified range be paged in. The number of such pages is returned * in the "rbehind" and "rahead" parameters, and they will be in the * inactive queue upon return. * * The pages in "m" must be busied and will remain busied upon return. */ static int swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead) { struct buf *bp; vm_page_t mpred, msucc, p; vm_pindex_t pindex; daddr_t blk; int i, j, maxahead, maxbehind, reqcount, shift; reqcount = count; VM_OBJECT_WUNLOCK(object); bp = getpbuf(&nsw_rcount); VM_OBJECT_WLOCK(object); if (!swap_pager_haspage(object, m[0]->pindex, &maxbehind, &maxahead)) { relpbuf(bp, &nsw_rcount); return (VM_PAGER_FAIL); } /* * Clip the readahead and readbehind ranges to exclude resident pages. */ if (rahead != NULL) { KASSERT(reqcount - 1 <= maxahead, ("page count %d extends beyond swap block", reqcount)); *rahead = imin(*rahead, maxahead - (reqcount - 1)); pindex = m[reqcount - 1]->pindex; msucc = TAILQ_NEXT(m[reqcount - 1], listq); if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead) *rahead = msucc->pindex - pindex - 1; } if (rbehind != NULL) { *rbehind = imin(*rbehind, maxbehind); pindex = m[0]->pindex; mpred = TAILQ_PREV(m[0], pglist, listq); if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind) *rbehind = pindex - mpred->pindex - 1; } /* * Allocate readahead and readbehind pages. */ shift = rbehind != NULL ? *rbehind : 0; if (shift != 0) { for (i = 1; i <= shift; i++) { p = vm_page_alloc(object, m[0]->pindex - i, VM_ALLOC_NORMAL); if (p == NULL) { /* Shift allocated pages to the left. */ for (j = 0; j < i - 1; j++) bp->b_pages[j] = bp->b_pages[j + shift - i + 1]; break; } bp->b_pages[shift - i] = p; } shift = i - 1; *rbehind = shift; } for (i = 0; i < reqcount; i++) bp->b_pages[i + shift] = m[i]; if (rahead != NULL) { for (i = 0; i < *rahead; i++) { p = vm_page_alloc(object, m[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL); if (p == NULL) break; bp->b_pages[shift + reqcount + i] = p; } *rahead = i; } if (rbehind != NULL) count += *rbehind; if (rahead != NULL) count += *rahead; vm_object_pip_add(object, count); for (i = 0; i < count; i++) bp->b_pages[i]->oflags |= VPO_SWAPINPROG; pindex = bp->b_pages[0]->pindex; blk = swp_pager_meta_ctl(object, pindex, 0); KASSERT(blk != SWAPBLK_NONE, ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex)); VM_OBJECT_WUNLOCK(object); bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_READ; bp->b_iodone = swp_pager_async_iodone; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_blkno = blk; bp->b_bcount = PAGE_SIZE * count; bp->b_bufsize = PAGE_SIZE * count; bp->b_npages = count; bp->b_pgbefore = rbehind != NULL ? *rbehind : 0; bp->b_pgafter = rahead != NULL ? *rahead : 0; VM_CNT_INC(v_swapin); VM_CNT_ADD(v_swappgsin, count); /* * perform the I/O. NOTE!!! bp cannot be considered valid after * this point because we automatically release it on completion. * Instead, we look at the one page we are interested in which we * still hold a lock on even through the I/O completion. * * The other pages in our m[] array are also released on completion, * so we cannot assume they are valid anymore either. * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ BUF_KERNPROC(bp); swp_pager_strategy(bp); /* * Wait for the pages we want to complete. VPO_SWAPINPROG is always * cleared on completion. If an I/O error occurs, SWAPBLK_NONE * is set in the metadata for each page in the request. */ VM_OBJECT_WLOCK(object); while ((m[0]->oflags & VPO_SWAPINPROG) != 0) { m[0]->oflags |= VPO_SWAPSLEEP; VM_CNT_INC(v_intrans); if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, "swread", hz * 20)) { printf( "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n", bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); } } /* * If we had an unrecoverable read error pages will not be valid. */ for (i = 0; i < reqcount; i++) if (m[i]->valid != VM_PAGE_BITS_ALL) return (VM_PAGER_ERROR); return (VM_PAGER_OK); /* * A final note: in a low swap situation, we cannot deallocate swap * and mark a page dirty here because the caller is likely to mark * the page clean when we return, causing the page to possibly revert * to all-zero's later. */ } /* * swap_pager_getpages_async(): * * Right now this is emulation of asynchronous operation on top of * swap_pager_getpages(). */ static int swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { int r, error; r = swap_pager_getpages(object, m, count, rbehind, rahead); VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: error = 0; break; case VM_PAGER_ERROR: error = EIO; break; case VM_PAGER_FAIL: error = EINVAL; break; default: panic("unhandled swap_pager_getpages() error %d", r); } (iodone)(arg, m, count, error); VM_OBJECT_WLOCK(object); return (r); } /* * swap_pager_putpages: * * Assign swap (if necessary) and initiate I/O on the specified pages. * * We support both OBJT_DEFAULT and OBJT_SWAP objects. DEFAULT objects * are automatically converted to SWAP objects. * * In a low memory situation we may block in VOP_STRATEGY(), but the new * vm_page reservation system coupled with properly written VFS devices * should ensure that no low-memory deadlock occurs. This is an area * which needs work. * * The parent has N vm_object_pip_add() references prior to * calling us and will remove references for rtvals[] that are * not set to VM_PAGER_PEND. We need to remove the rest on I/O * completion. * * The parent has soft-busy'd the pages it passes us and will unbusy * those whos rtvals[] entry is not set to VM_PAGER_PEND on return. * We need to unbusy the rest on I/O completion. */ static void swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { int i, n; boolean_t sync; if (count && m[0]->object != object) { panic("swap_pager_putpages: object mismatch %p/%p", object, m[0]->object ); } /* * Step 1 * * Turn object into OBJT_SWAP * check for bogus sysops * force sync if not pageout process */ if (object->type != OBJT_SWAP) swp_pager_meta_build(object, 0, SWAPBLK_NONE); VM_OBJECT_WUNLOCK(object); n = 0; if (curproc != pageproc) sync = TRUE; else sync = (flags & VM_PAGER_PUT_SYNC) != 0; /* * Step 2 * * Assign swap blocks and issue I/O. We reallocate swap on the fly. * The page is left dirty until the pageout operation completes * successfully. */ for (i = 0; i < count; i += n) { int j; struct buf *bp; daddr_t blk; /* * Maximum I/O size is limited by a number of factors. */ n = min(BLIST_MAX_ALLOC, count - i); n = min(n, nsw_cluster_max); /* * Get biggest block of swap we can. If we fail, fall * back and try to allocate a smaller block. Don't go * overboard trying to allocate space if it would overly * fragment swap. */ while ( (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE && n > 4 ) { n >>= 1; } if (blk == SWAPBLK_NONE) { for (j = 0; j < n; ++j) rtvals[i+j] = VM_PAGER_FAIL; continue; } /* * All I/O parameters have been satisfied, build the I/O * request and assign the swap space. */ if (sync == TRUE) { bp = getpbuf(&nsw_wcount_sync); } else { bp = getpbuf(&nsw_wcount_async); bp->b_flags = B_ASYNC; } bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_WRITE; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_bcount = PAGE_SIZE * n; bp->b_bufsize = PAGE_SIZE * n; bp->b_blkno = blk; VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { vm_page_t mreq = m[i+j]; swp_pager_meta_build( mreq->object, mreq->pindex, blk + j ); vm_page_dirty(mreq); mreq->oflags |= VPO_SWAPINPROG; bp->b_pages[j] = mreq; } VM_OBJECT_WUNLOCK(object); bp->b_npages = n; /* * Must set dirty range for NFS to work. */ bp->b_dirtyoff = 0; bp->b_dirtyend = bp->b_bcount; VM_CNT_INC(v_swapout); VM_CNT_ADD(v_swappgsout, bp->b_npages); /* * We unconditionally set rtvals[] to VM_PAGER_PEND so that we * can call the async completion routine at the end of a * synchronous I/O operation. Otherwise, our caller would * perform duplicate unbusy and wakeup operations on the page * and object, respectively. */ for (j = 0; j < n; j++) rtvals[i + j] = VM_PAGER_PEND; /* * asynchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ if (sync == FALSE) { bp->b_iodone = swp_pager_async_iodone; BUF_KERNPROC(bp); swp_pager_strategy(bp); continue; } /* * synchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ bp->b_iodone = bdone; swp_pager_strategy(bp); /* * Wait for the sync I/O to complete. */ bwait(bp, PVM, "swwrt"); /* * Now that we are through with the bp, we can call the * normal async completion, which frees everything up. */ swp_pager_async_iodone(bp); } VM_OBJECT_WLOCK(object); } /* * swp_pager_async_iodone: * * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * * This routine may not sleep. */ static void swp_pager_async_iodone(struct buf *bp) { int i; vm_object_t object = NULL; /* * report error */ if (bp->b_ioflags & BIO_ERROR) { printf( "swap_pager: I/O error - %s failed; blkno %ld," "size %ld, error %d\n", ((bp->b_iocmd == BIO_READ) ? "pagein" : "pageout"), (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error ); } /* * remove the mapping for kernel virtual */ if (buf_mapped(bp)) pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages); else bp->b_data = bp->b_kvabase; if (bp->b_npages) { object = bp->b_pages[0]->object; VM_OBJECT_WLOCK(object); } /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, * we may be able to recover by reassigning the swap later on. So * in this case we remove the m->swapblk assignment for the page * but do not free it in the rlist. The errornous block(s) are thus * never reallocated as swap. Redirty the page and continue. */ for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; m->oflags &= ~VPO_SWAPINPROG; if (m->oflags & VPO_SWAPSLEEP) { m->oflags &= ~VPO_SWAPSLEEP; wakeup(&object->paging_in_progress); } if (bp->b_ioflags & BIO_ERROR) { /* * If an error occurs I'd love to throw the swapblk * away without freeing it back to swapspace, so it * can never be used again. But I can't from an * interrupt. */ if (bp->b_iocmd == BIO_READ) { /* * NOTE: for reads, m->dirty will probably * be overridden by the original caller of * getpages so don't play cute tricks here. */ m->valid = 0; } else { /* * If a write error occurs, reactivate page * so it doesn't clog the inactive list, * then finish the I/O. */ vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); vm_page_sunbusy(m); } } else if (bp->b_iocmd == BIO_READ) { /* * NOTE: for reads, m->dirty will probably be * overridden by the original caller of getpages so * we cannot set them in order to free the underlying * swap in a low-swap situation. I don't think we'd * want to do that anyway, but it was an optimization * that existed in the old swapper for a time before * it got ripped out due to precisely this problem. */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); m->valid = VM_PAGE_BITS_ALL; if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(m); } else { /* * For write success, clear the dirty * status, then finish the I/O ( which decrements the * busy count and possibly wakes waiter's up ). * A page is only written to swap after a period of * inactivity. Therefore, we do not expect it to be * reused. */ KASSERT(!pmap_page_is_write_mapped(m), ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); vm_page_lock(m); vm_page_deactivate_noreuse(m); vm_page_unlock(m); vm_page_sunbusy(m); } } /* * adjust pip. NOTE: the original parent may still have its own * pip refs on the object. */ if (object != NULL) { vm_object_pip_wakeupn(object, bp->b_npages); VM_OBJECT_WUNLOCK(object); } /* * swapdev_strategy() manually sets b_vp and b_bufobj before calling * bstrategy(). Set them back to NULL now we're done with it, or we'll * trigger a KASSERT in relpbuf(). */ if (bp->b_vp) { bp->b_vp = NULL; bp->b_bufobj = NULL; } /* * release the physical I/O buffer */ relpbuf( bp, ((bp->b_iocmd == BIO_READ) ? &nsw_rcount : ((bp->b_flags & B_ASYNC) ? &nsw_wcount_async : &nsw_wcount_sync ) ) ); } /* * swap_pager_isswapped: * * Return 1 if at least one page in the given object is paged * out to the given swap device. * * This routine may not sleep. */ int swap_pager_isswapped(vm_object_t object, struct swdevt *sp) { daddr_t index = 0; int bcount; int i; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return (0); mtx_lock(&swhash_mtx); for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) { struct swblock *swap; if ((swap = *swp_pager_hash(object, index)) != NULL) { for (i = 0; i < SWAP_META_PAGES; ++i) { if (swp_pager_isondev(swap->swb_pages[i], sp)) { mtx_unlock(&swhash_mtx); return (1); } } } index += SWAP_META_PAGES; } mtx_unlock(&swhash_mtx); return (0); } int swap_pager_nswapdev(void) { return (nswapdev); } /* * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in * * This routine dissociates the page at the given index within an object * from its backing store, paging it in if it does not reside in memory. * If the page is paged in, it is marked dirty and placed in the laundry * queue. The page is marked dirty because it no longer has backing * store. It is placed in the laundry queue because it has not been * accessed recently. Otherwise, it would already reside in memory. * * We also attempt to swap in all other pages in the swap block. * However, we only guarantee that the one at the specified index is * paged in. * * XXX - The code to page the whole block in doesn't work, so we * revert to the one-by-one behavior for now. Sigh. */ static inline void swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; vm_object_pip_add(object, 1); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_wakeup(object); vm_page_dirty(m); vm_page_lock(m); vm_page_activate(m); vm_page_unlock(m); vm_page_xunbusy(m); vm_pager_page_unswapped(m); return; } if (swap_pager_getpages(object, &m, 1, NULL, NULL) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_wakeup(object); vm_page_dirty(m); vm_page_lock(m); vm_page_launder(m); vm_page_unlock(m); vm_page_xunbusy(m); vm_pager_page_unswapped(m); } /* * swap_pager_swapoff: * * Page in all of the pages that have been paged out to the * given device. The corresponding blocks in the bitmap must be * marked as allocated and the device must be flagged SW_CLOSING. * There may be no processes swapped out to the device. * * This routine may block. */ static void swap_pager_swapoff(struct swdevt *sp) { struct swblock *swap; vm_object_t locked_obj, object; vm_pindex_t pindex; int i, j, retries; sx_assert(&swdev_syscall_lock, SA_XLOCKED); retries = 0; locked_obj = NULL; full_rescan: mtx_lock(&swhash_mtx); for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */ restart: for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) { object = swap->swb_object; pindex = swap->swb_index; for (j = 0; j < SWAP_META_PAGES; ++j) { if (!swp_pager_isondev(swap->swb_pages[j], sp)) continue; if (locked_obj != object) { if (locked_obj != NULL) VM_OBJECT_WUNLOCK(locked_obj); locked_obj = object; if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(&swhash_mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); mtx_lock(&swhash_mtx); goto restart; } } MPASS(locked_obj == object); mtx_unlock(&swhash_mtx); swp_pager_force_pagein(object, pindex + j); mtx_lock(&swhash_mtx); goto restart; } } } mtx_unlock(&swhash_mtx); if (locked_obj != NULL) { VM_OBJECT_WUNLOCK(locked_obj); locked_obj = NULL; } if (sp->sw_used) { /* * Objects may be locked or paging to the device being * removed, so we will miss their pages and need to * make another pass. We have marked this device as * SW_CLOSING, so the activity should finish soon. */ retries++; if (retries > 100) { panic("swapoff: failed to locate %d swap blocks", sp->sw_used); } pause("swpoff", hz / 20); goto full_rescan; } EVENTHANDLER_INVOKE(swapoff, sp); } /************************************************************************ * SWAP META DATA * ************************************************************************ * * These routines manipulate the swap metadata stored in the * OBJT_SWAP object. * * Swap metadata is implemented with a global hash and not directly * linked into the object. Instead the object simply contains * appropriate tracking counters. */ /* * SWP_PAGER_META_BUILD() - add swap block to swap meta data for object * * We first convert the object to a swap object if it is a default * object. * * The specified swapblk is added to the object's swap metadata. If * the swapblk is not valid, it is freed instead. Any previously * assigned swapblk is freed. */ static void swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) { static volatile int exhausted; struct swblock *swap; struct swblock **pswap; int idx; VM_OBJECT_ASSERT_WLOCKED(object); /* * Convert default object to swap object if necessary */ if (object->type != OBJT_SWAP) { object->type = OBJT_SWAP; object->un_pager.swp.swp_bcount = 0; KASSERT(object->handle == NULL, ("default pager with handle")); } /* * Locate hash entry. If not found create, but if we aren't adding * anything just return. If we run out of space in the map we wait * and, since the hash table may have changed, retry. */ retry: mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, pindex); if ((swap = *pswap) == NULL) { int i; if (swapblk == SWAPBLK_NONE) goto done; swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT | (curproc == pageproc ? M_USE_RESERVE : 0)); if (swap == NULL) { mtx_unlock(&swhash_mtx); VM_OBJECT_WUNLOCK(object); if (uma_zone_exhausted(swap_zone)) { if (atomic_cmpset_int(&exhausted, 0, 1)) printf("swap zone exhausted, " "increase kern.maxswzone\n"); vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonex", 10); } else VM_WAIT; VM_OBJECT_WLOCK(object); goto retry; } if (atomic_cmpset_int(&exhausted, 1, 0)) printf("swap zone ok\n"); swap->swb_hnext = NULL; swap->swb_object = object; swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK; swap->swb_count = 0; ++object->un_pager.swp.swp_bcount; for (i = 0; i < SWAP_META_PAGES; ++i) swap->swb_pages[i] = SWAPBLK_NONE; } /* * Delete prior contents of metadata */ idx = pindex & SWAP_META_MASK; if (swap->swb_pages[idx] != SWAPBLK_NONE) { swp_pager_freeswapspace(swap->swb_pages[idx], 1); --swap->swb_count; } /* * Enter block into metadata */ swap->swb_pages[idx] = swapblk; if (swapblk != SWAPBLK_NONE) ++swap->swb_count; done: mtx_unlock(&swhash_mtx); } /* * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata * * The requested range of blocks is freed, with any associated swap * returned to the swap bitmap. * * This routine will free swap metadata structures as they are cleaned * out. This routine does *NOT* operate on swap metadata associated * with resident pages. */ static void swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count) { struct swblock **pswap, *swap; vm_pindex_t c; daddr_t v; int n, sidx; VM_OBJECT_ASSERT_LOCKED(object); if (object->type != OBJT_SWAP || count == 0) return; mtx_lock(&swhash_mtx); for (c = 0; c < count;) { pswap = swp_pager_hash(object, index); sidx = index & SWAP_META_MASK; n = SWAP_META_PAGES - sidx; index += n; if ((swap = *pswap) == NULL) { c += n; continue; } for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) { if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE) continue; swp_pager_freeswapspace(v, 1); swap->swb_pages[sidx] = SWAPBLK_NONE; if (--swap->swb_count == 0) { *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; c += SWAP_META_PAGES - sidx; break; } } } mtx_unlock(&swhash_mtx); } /* * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object * * This routine locates and destroys all swap metadata associated with * an object. */ static void swp_pager_meta_free_all(vm_object_t object) { struct swblock **pswap, *swap; vm_pindex_t index; daddr_t v; int i; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return; index = 0; while (object->un_pager.swp.swp_bcount != 0) { mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, index); if ((swap = *pswap) != NULL) { for (i = 0; i < SWAP_META_PAGES; ++i) { v = swap->swb_pages[i]; if (v != SWAPBLK_NONE) { --swap->swb_count; swp_pager_freeswapspace(v, 1); } } if (swap->swb_count != 0) panic( "swap_pager_meta_free_all: swb_count != 0"); *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; } mtx_unlock(&swhash_mtx); index += SWAP_META_PAGES; } } /* * SWP_PAGER_METACTL() - misc control of swap and vm_page_t meta data. * * This routine is capable of looking up, popping, or freeing * swapblk assignments in the swap meta data or in the vm_page_t. * The routine typically returns the swapblk being looked-up, or popped, * or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block * was invalid. This routine will automatically free any invalid * meta-data swapblks. * * It is not possible to store invalid swapblks in the swap meta data * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking. * * When acting on a busy resident page and paging is in progress, we * have to wait until paging is complete but otherwise can act on the * busy page. * * SWM_FREE remove and free swap block from metadata * SWM_POP remove from meta data but do not free.. pop it out */ static daddr_t swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags) { struct swblock **pswap; struct swblock *swap; daddr_t r1; int idx; VM_OBJECT_ASSERT_LOCKED(object); /* * The meta data only exists of the object is OBJT_SWAP * and even then might not be allocated yet. */ if (object->type != OBJT_SWAP) return (SWAPBLK_NONE); r1 = SWAPBLK_NONE; mtx_lock(&swhash_mtx); pswap = swp_pager_hash(object, pindex); if ((swap = *pswap) != NULL) { idx = pindex & SWAP_META_MASK; r1 = swap->swb_pages[idx]; if (r1 != SWAPBLK_NONE) { if (flags & SWM_FREE) { swp_pager_freeswapspace(r1, 1); r1 = SWAPBLK_NONE; } if (flags & (SWM_FREE|SWM_POP)) { swap->swb_pages[idx] = SWAPBLK_NONE; if (--swap->swb_count == 0) { *pswap = swap->swb_hnext; uma_zfree(swap_zone, swap); --object->un_pager.swp.swp_bcount; } } } } mtx_unlock(&swhash_mtx); return (r1); } /* * Returns the least page index which is greater than or equal to the * parameter pindex and for which there is a swap block allocated. * Returns object's size if the object's type is not swap or if there * are no allocated swap blocks for the object after the requested * pindex. */ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { struct swblock **pswap, *swap; vm_pindex_t i, j, lim; int idx; VM_OBJECT_ASSERT_LOCKED(object); if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0) return (object->size); mtx_lock(&swhash_mtx); for (j = pindex; j < object->size; j = lim) { pswap = swp_pager_hash(object, j); lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES); if (lim > object->size) lim = object->size; if ((swap = *pswap) != NULL) { for (idx = j & SWAP_META_MASK, i = j; i < lim; i++, idx++) { if (swap->swb_pages[idx] != SWAPBLK_NONE) goto found; } } } i = object->size; found: mtx_unlock(&swhash_mtx); return (i); } /* * System call swapon(name) enables swapping on device name, * which must be in the swdevsw. Return EBUSY * if already swapping on this device. */ #ifndef _SYS_SYSPROTO_H_ struct swapon_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapon(struct thread *td, struct swapon_args *uap) { struct vattr attr; struct vnode *vp; struct nameidata nd; int error; error = priv_check(td, PRIV_SWAPON); if (error) return (error); sx_xlock(&swdev_syscall_lock); /* * Swap metadata may not fit in the KVM if we have physical * memory of >1GB. */ if (swap_zone == NULL) { error = ENOMEM; goto done; } NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vn_isdisk(vp, &error)) { error = swapongeom(vp); } else if (vp->v_type == VREG && (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && (error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) { /* * Allow direct swapping to NFS regular files in the same * way that nfs_mountroot() sets up diskless swapping. */ error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); } if (error) vrele(vp); done: sx_xunlock(&swdev_syscall_lock); return (error); } /* * Check that the total amount of swap currently configured does not * exceed half the theoretical maximum. If it does, print a warning * message and return -1; otherwise, return 0. */ static int swapon_check_swzone(unsigned long npages) { unsigned long maxpages; /* absolute maximum we can handle assuming 100% efficiency */ maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES; /* recommend using no more than half that amount */ if (npages > maxpages / 2) { printf("warning: total configured swap (%lu pages) " "exceeds maximum recommended amount (%lu pages).\n", npages, maxpages / 2); printf("warning: increase kern.maxswzone " "or reduce amount of swap.\n"); return (-1); } return (0); } static void swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags) { struct swdevt *sp, *tsp; swblk_t dvbase; u_long mblocks; /* * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. * First chop nblks off to page-align it, then convert. * * sw->sw_nblks is in page-sized chunks now too. */ nblks &= ~(ctodb(1) - 1); nblks = dbtoc(nblks); /* * If we go beyond this, we get overflows in the radix * tree bitmap code. */ mblocks = 0x40000000 / BLIST_META_RADIX; if (nblks > mblocks) { printf( "WARNING: reducing swap size to maximum of %luMB per unit\n", mblocks / 1024 / 1024 * PAGE_SIZE); nblks = mblocks; } sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); sp->sw_vp = vp; sp->sw_id = id; sp->sw_dev = dev; sp->sw_flags = 0; sp->sw_nblks = nblks; sp->sw_used = 0; sp->sw_strategy = strategy; sp->sw_close = close; sp->sw_flags = flags; sp->sw_blist = blist_create(nblks, M_WAITOK); /* * Do not free the first two block in order to avoid overwriting * any bsd label at the front of the partition */ blist_free(sp->sw_blist, 2, nblks - 2); dvbase = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(tsp, &swtailq, sw_list) { if (tsp->sw_end >= dvbase) { /* * We put one uncovered page between the devices * in order to definitively prevent any cross-device * I/O requests */ dvbase = tsp->sw_end + 1; } } sp->sw_first = dvbase; sp->sw_end = dvbase + nblks; TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; swap_pager_avail += nblks; swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; swapon_check_swzone(swap_total / PAGE_SIZE); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); EVENTHANDLER_INVOKE(swapon, sp); } /* * SYSCALL: swapoff(devname) * * Disable swapping on the given device. * * XXX: Badly designed system call: it should use a device index * rather than filename as specification. We keep sw_vp around * only to make this work. */ #ifndef _SYS_SYSPROTO_H_ struct swapoff_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapoff(struct thread *td, struct swapoff_args *uap) { struct vnode *vp; struct nameidata nd; struct swdevt *sp; int error; error = priv_check(td, PRIV_SWAPOFF); if (error) return (error); sx_xlock(&swdev_syscall_lock); NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_vp == vp) break; } mtx_unlock(&sw_dev_mtx); if (sp == NULL) { error = EINVAL; goto done; } error = swapoff_one(sp, td->td_ucred); done: sx_xunlock(&swdev_syscall_lock); return (error); } static int swapoff_one(struct swdevt *sp, struct ucred *cred) { u_long nblks, dvbase; #ifdef MAC int error; #endif sx_assert(&swdev_syscall_lock, SA_XLOCKED); #ifdef MAC (void) vn_lock(sp->sw_vp, LK_EXCLUSIVE | LK_RETRY); error = mac_system_check_swapoff(cred, sp->sw_vp); (void) VOP_UNLOCK(sp->sw_vp, 0); if (error != 0) return (error); #endif nblks = sp->sw_nblks; /* * We can turn off this swap device safely only if the * available virtual memory in the system will fit the amount * of data we will have to page back in, plus an epsilon so * the system doesn't become critically low on swap space. */ if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat) return (ENOMEM); /* * Prevent further allocations on this device. */ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) { swap_pager_avail -= blist_fill(sp->sw_blist, dvbase, dmmax); } swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE; mtx_unlock(&sw_dev_mtx); /* * Page in the contents of the device and close it. */ swap_pager_swapoff(sp); sp->sw_close(curthread, sp); mtx_lock(&sw_dev_mtx); sp->sw_id = NULL; TAILQ_REMOVE(&swtailq, sp, sw_list); nswapdev--; if (nswapdev == 0) { swap_pager_full = 2; swap_pager_almost_full = 1; } if (swdevhd == sp) swdevhd = NULL; mtx_unlock(&sw_dev_mtx); blist_destroy(sp->sw_blist); free(sp, M_VMPGDATA); return (0); } void swapoff_all(void) { struct swdevt *sp, *spt; const char *devname; int error; sx_xlock(&swdev_syscall_lock); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH_SAFE(sp, &swtailq, sw_list, spt) { mtx_unlock(&sw_dev_mtx); if (vn_isdisk(sp->sw_vp, NULL)) devname = devtoname(sp->sw_vp->v_rdev); else devname = "[file]"; error = swapoff_one(sp, thread0.td_ucred); if (error != 0) { printf("Cannot remove swap device %s (error=%d), " "skipping.\n", devname, error); } else if (bootverbose) { printf("Swap device %s removed.\n", devname); } mtx_lock(&sw_dev_mtx); } mtx_unlock(&sw_dev_mtx); sx_xunlock(&swdev_syscall_lock); } void swap_pager_status(int *total, int *used) { struct swdevt *sp; *total = 0; *used = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { *total += sp->sw_nblks; *used += sp->sw_used; } mtx_unlock(&sw_dev_mtx); } int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len) { struct swdevt *sp; const char *tmp_devname; int error, n; n = 0; error = ENOENT; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (n != name) { n++; continue; } xs->xsw_version = XSWDEV_VERSION; xs->xsw_dev = sp->sw_dev; xs->xsw_flags = sp->sw_flags; xs->xsw_nblks = sp->sw_nblks; xs->xsw_used = sp->sw_used; if (devname != NULL) { if (vn_isdisk(sp->sw_vp, NULL)) tmp_devname = devtoname(sp->sw_vp->v_rdev); else tmp_devname = "[file]"; strncpy(devname, tmp_devname, len); } error = 0; break; } mtx_unlock(&sw_dev_mtx); return (error); } +#if defined(COMPAT_FREEBSD11) +#define XSWDEV_VERSION_11 1 +struct xswdev11 { + u_int xsw_version; + uint32_t xsw_dev; + int xsw_flags; + int xsw_nblks; + int xsw_used; +}; +#endif + static int sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) { struct xswdev xs; +#if defined(COMPAT_FREEBSD11) + struct xswdev11 xs11; +#endif int error; if (arg2 != 1) /* name length */ return (EINVAL); error = swap_dev_info(*(int *)arg1, &xs, NULL, 0); if (error != 0) return (error); - error = SYSCTL_OUT(req, &xs, sizeof(xs)); +#if defined(COMPAT_FREEBSD11) + if (req->oldlen == sizeof(xs11)) { + xs11.xsw_version = XSWDEV_VERSION_11; + xs11.xsw_dev = xs.xsw_dev; /* truncation */ + xs11.xsw_flags = xs.xsw_flags; + xs11.xsw_nblks = xs.xsw_nblks; + xs11.xsw_used = xs.xsw_used; + error = SYSCTL_OUT(req, &xs11, sizeof(xs11)); + } else +#endif + error = SYSCTL_OUT(req, &xs, sizeof(xs)); return (error); } SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswapdev, 0, "Number of swap devices"); SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_vm_swap_info, "Swap statistics by device"); /* * vmspace_swap_count() - count the approximate swap usage in pages for a * vmspace. * * The map must be locked. * * Swap usage is determined by taking the proportional swap used by * VM objects backing the VM map. To make up for fractional losses, * if the VM object has any swap use at all the associated map entries * count for at least 1 swap page. */ long vmspace_swap_count(struct vmspace *vmspace) { vm_map_t map; vm_map_entry_t cur; vm_object_t object; long count, n; map = &vmspace->vm_map; count = 0; for (cur = map->header.next; cur != &map->header; cur = cur->next) { if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && (object = cur->object.vm_object) != NULL) { VM_OBJECT_WLOCK(object); if (object->type == OBJT_SWAP && object->un_pager.swp.swp_bcount != 0) { n = (cur->end - cur->start) / PAGE_SIZE; count += object->un_pager.swp.swp_bcount * SWAP_META_PAGES * n / object->size + 1; } VM_OBJECT_WUNLOCK(object); } } return (count); } /* * GEOM backend * * Swapping onto disk devices. * */ static g_orphan_t swapgeom_orphan; static struct g_class g_swap_class = { .name = "SWAP", .version = G_VERSION, .orphan = swapgeom_orphan, }; DECLARE_GEOM_CLASS(g_swap_class, g_class); static void swapgeom_close_ev(void *arg, int flags) { struct g_consumer *cp; cp = arg; g_access(cp, -1, -1, 0); g_detach(cp); g_destroy_consumer(cp); } /* * Add a reference to the g_consumer for an inflight transaction. */ static void swapgeom_acquire(struct g_consumer *cp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index++; } /* * Remove a reference from the g_consumer. Post a close event if all * references go away, since the function might be called from the * biodone context. */ static void swapgeom_release(struct g_consumer *cp, struct swdevt *sp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index--; if (cp->index == 0) { if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0) sp->sw_id = NULL; } } static void swapgeom_done(struct bio *bp2) { struct swdevt *sp; struct buf *bp; struct g_consumer *cp; bp = bp2->bio_caller2; cp = bp2->bio_from; bp->b_ioflags = bp2->bio_flags; if (bp2->bio_error) bp->b_ioflags |= BIO_ERROR; bp->b_resid = bp->b_bcount - bp2->bio_completed; bp->b_error = bp2->bio_error; bufdone(bp); sp = bp2->bio_caller1; mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); g_destroy_bio(bp2); } static void swapgeom_strategy(struct buf *bp, struct swdevt *sp) { struct bio *bio; struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sp->sw_id; if (cp == NULL) { mtx_unlock(&sw_dev_mtx); bp->b_error = ENXIO; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } swapgeom_acquire(cp); mtx_unlock(&sw_dev_mtx); if (bp->b_iocmd == BIO_WRITE) bio = g_new_bio(); else bio = g_alloc_bio(); if (bio == NULL) { mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); bp->b_error = ENOMEM; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } bio->bio_caller1 = sp; bio->bio_caller2 = bp; bio->bio_cmd = bp->b_iocmd; bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE; bio->bio_length = bp->b_bcount; bio->bio_done = swapgeom_done; if (!buf_mapped(bp)) { bio->bio_ma = bp->b_pages; bio->bio_data = unmapped_buf; bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK; bio->bio_ma_n = bp->b_npages; bio->bio_flags |= BIO_UNMAPPED; } else { bio->bio_data = bp->b_data; bio->bio_ma = NULL; } g_io_request(bio, cp); return; } static void swapgeom_orphan(struct g_consumer *cp) { struct swdevt *sp; int destroy; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == cp) { sp->sw_flags |= SW_CLOSING; break; } } /* * Drop reference we were created with. Do directly since we're in a * special context where we don't have to queue the call to * swapgeom_close_ev(). */ cp->index--; destroy = ((sp != NULL) && (cp->index == 0)); if (destroy) sp->sw_id = NULL; mtx_unlock(&sw_dev_mtx); if (destroy) swapgeom_close_ev(cp, 0); } static void swapgeom_close(struct thread *td, struct swdevt *sw) { struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sw->sw_id; sw->sw_id = NULL; mtx_unlock(&sw_dev_mtx); /* * swapgeom_close() may be called from the biodone context, * where we cannot perform topology changes. Delegate the * work to the events thread. */ if (cp != NULL) g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL); } static int swapongeom_locked(struct cdev *dev, struct vnode *vp) { struct g_provider *pp; struct g_consumer *cp; static struct g_geom *gp; struct swdevt *sp; u_long nblks; int error; pp = g_dev_getprovider(dev); if (pp == NULL) return (ENODEV); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { cp = sp->sw_id; if (cp != NULL && cp->provider == pp) { mtx_unlock(&sw_dev_mtx); return (EBUSY); } } mtx_unlock(&sw_dev_mtx); if (gp == NULL) gp = g_new_geomf(&g_swap_class, "swap"); cp = g_new_consumer(gp); cp->index = 1; /* Number of active I/Os, plus one for being active. */ cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; g_attach(cp, pp); /* * XXX: Every time you think you can improve the margin for * footshooting, somebody depends on the ability to do so: * savecore(8) wants to write to our swapdev so we cannot * set an exclusive count :-( */ error = g_access(cp, 1, 1, 0); if (error != 0) { g_detach(cp); g_destroy_consumer(cp); return (error); } nblks = pp->mediasize / DEV_BSIZE; swaponsomething(vp, cp, nblks, swapgeom_strategy, swapgeom_close, dev2udev(dev), (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0); return (0); } static int swapongeom(struct vnode *vp) { int error; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type != VCHR || (vp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; } else { g_topology_lock(); error = swapongeom_locked(vp->v_rdev, vp); g_topology_unlock(); } VOP_UNLOCK(vp, 0); return (error); } /* * VNODE backend * * This is used mainly for network filesystem (read: probably only tested * with NFS) swapfiles. * */ static void swapdev_strategy(struct buf *bp, struct swdevt *sp) { struct vnode *vp2; bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); vp2 = sp->sw_id; vhold(vp2); if (bp->b_iocmd == BIO_WRITE) { if (bp->b_bufobj) bufobj_wdrop(bp->b_bufobj); bufobj_wref(&vp2->v_bufobj); } if (bp->b_bufobj != &vp2->v_bufobj) bp->b_bufobj = &vp2->v_bufobj; bp->b_vp = vp2; bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); return; } static void swapdev_close(struct thread *td, struct swdevt *sp) { VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td); vrele(sp->sw_vp); } static int swaponvp(struct thread *td, struct vnode *vp, u_long nblks) { struct swdevt *sp; int error; if (nblks == 0) return (ENXIO); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == vp) { mtx_unlock(&sw_dev_mtx); return (EBUSY); } } mtx_unlock(&sw_dev_mtx); (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_system_check_swapon(td->td_ucred, vp); if (error == 0) #endif error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL); (void) VOP_UNLOCK(vp, 0); if (error) return (error); swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, NODEV, 0); return (0); } static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS) { int error, new, n; new = nsw_wcount_async_max; error = sysctl_handle_int(oidp, &new, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new > nswbuf / 2 || new < 1) return (EINVAL); mtx_lock(&pbuf_mtx); while (nsw_wcount_async_max != new) { /* * Adjust difference. If the current async count is too low, * we will need to sqeeze our update slowly in. Sleep with a * higher priority than getpbuf() to finish faster. */ n = new - nsw_wcount_async_max; if (nsw_wcount_async + n >= 0) { nsw_wcount_async += n; nsw_wcount_async_max += n; wakeup(&nsw_wcount_async); } else { nsw_wcount_async_max -= nsw_wcount_async; nsw_wcount_async = 0; msleep(&nsw_wcount_async, &pbuf_mtx, PSWP, "swpsysctl", 0); } } mtx_unlock(&pbuf_mtx); return (0); } Index: head/sys/vm/vm_object.c =================================================================== --- head/sys/vm/vm_object.c (revision 318735) +++ head/sys/vm/vm_object.c (revision 318736) @@ -1,2644 +1,2647 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory object module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include /* for curproc, pageproc */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags); static void vm_object_qcollapse(vm_object_t object); static void vm_object_vndeallocate(vm_object_t object); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; struct vm_object kmem_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); static long object_collapses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, &object_collapses, 0, "VM object collapses"); static long object_bypasses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, 0, "VM object bypasses"); static uma_zone_t obj_zone; static int vm_object_zinit(void *mem, int size, int flags); #ifdef INVARIANTS static void vm_object_zdtor(void *mem, int size, void *arg); static void vm_object_zdtor(void *mem, int size, void *arg) { vm_object_t object; object = (vm_object_t)mem; KASSERT(object->ref_count == 0, ("object %p ref_count = %d", object, object->ref_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages in its memq", object)); KASSERT(vm_radix_is_empty(&object->rtree), ("object %p has resident pages in its trie", object)); #if VM_NRESERVLEVEL > 0 KASSERT(LIST_EMPTY(&object->rvq), ("object %p has reservations", object)); #endif KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); KASSERT(object->resident_page_count == 0, ("object %p resident_page_count = %d", object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); KASSERT(object->type == OBJT_DEAD, ("object %p has non-dead type %d", object, object->type)); } #endif static int vm_object_zinit(void *mem, int size, int flags) { vm_object_t object; object = (vm_object_t)mem; rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW); /* These are true for any object that has been freed */ object->type = OBJT_DEAD; object->ref_count = 0; object->rtree.rt_root = 0; object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); return (0); } static void _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->type = type; switch (type) { case OBJT_DEAD: panic("_vm_object_allocate: can't create OBJT_DEAD"); case OBJT_DEFAULT: case OBJT_SWAP: object->flags = OBJ_ONEMAPPING; break; case OBJT_DEVICE: case OBJT_SG: object->flags = OBJ_FICTITIOUS | OBJ_UNMANAGED; break; case OBJT_MGTDEVICE: object->flags = OBJ_FICTITIOUS; break; case OBJT_PHYS: object->flags = OBJ_UNMANAGED; break; case OBJT_VNODE: object->flags = 0; break; default: panic("_vm_object_allocate: type %d is undefined", type); } object->size = size; object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; object->cred = NULL; object->charge = 0; object->handle = NULL; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif umtx_shm_object_init(object); } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init(void) { TAILQ_INIT(&vm_object_list); mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); rw_init(&kernel_object->lock, "kernel vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kernel_object); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif rw_init(&kmem_object->lock, "kmem vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); #if VM_NRESERVLEVEL > 0 kmem_object->flags |= OBJ_COLORED; kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object * without holding any references to it. */ obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, #ifdef INVARIANTS vm_object_zdtor, #else NULL, #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); vm_radix_init(); } void vm_object_clear_flag(vm_object_t object, u_short bits) { VM_OBJECT_ASSERT_WLOCKED(object); object->flags &= ~bits; } /* * Sets the default memory attribute for the specified object. Pages * that are allocated to this object are by default assigned this memory * attribute. * * Presently, this function must be called before any pages are allocated * to the object. In the future, this requirement may be relaxed for * "default" and "swap" objects. */ int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) { VM_OBJECT_ASSERT_WLOCKED(object); switch (object->type) { case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_MGTDEVICE: case OBJT_PHYS: case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) return (KERN_FAILURE); break; case OBJT_DEAD: return (KERN_INVALID_ARGUMENT); default: panic("vm_object_set_memattr: object %p is of undefined type", object); } object->memattr = memattr; return (KERN_SUCCESS); } void vm_object_pip_add(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress += i; } void vm_object_pip_subtract(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress -= i; } void vm_object_pip_wakeup(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wakeupn(vm_object_t object, short i) { VM_OBJECT_ASSERT_WLOCKED(object); if (i) object->paging_in_progress -= i; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wait(vm_object_t object, char *waitid) { VM_OBJECT_ASSERT_WLOCKED(object); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PVM, waitid, 0); } } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(objtype_t type, vm_pindex_t size) { vm_object_t object; object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(type, size, object); return (object); } /* * vm_object_reference: * * Gets another reference to the given object. Note: OBJ_DEAD * objects can be referenced during final cleaning. */ void vm_object_reference(vm_object_t object) { if (object == NULL) return; VM_OBJECT_WLOCK(object); vm_object_reference_locked(object); VM_OBJECT_WUNLOCK(object); } /* * vm_object_reference_locked: * * Gets another reference to the given object. * * The object must be locked. */ void vm_object_reference_locked(vm_object_t object) { struct vnode *vp; VM_OBJECT_ASSERT_WLOCKED(object); object->ref_count++; if (object->type == OBJT_VNODE) { vp = object->handle; vref(vp); } } /* * Handle deallocating an object of type OBJT_VNODE. */ static void vm_object_vndeallocate(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { vn_printf(vp, "vm_object_vndeallocate "); panic("vm_object_vndeallocate: bad object reference count"); } #endif if (!umtx_shm_vnobj_persistent && object->ref_count == 1) umtx_shm_object_terminated(object); /* * The test for text of vp vnode does not need a bypass to * reach right VV_TEXT there, since it is obtained from * object->handle. */ if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) { object->ref_count--; VM_OBJECT_WUNLOCK(object); /* vrele may need the vnode lock. */ vrele(vp); } else { vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vdrop(vp); VM_OBJECT_WLOCK(object); object->ref_count--; if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); } else { if (object->ref_count == 0) VOP_UNSET_TEXT(vp); VM_OBJECT_WUNLOCK(object); vput(vp); } } } /* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(vm_object_t object) { vm_object_t temp; struct vnode *vp; while (object != NULL) { VM_OBJECT_WLOCK(object); if (object->type == OBJT_VNODE) { vm_object_vndeallocate(object); return; } KASSERT(object->ref_count != 0, ("vm_object_deallocate: object deallocated too many times: %d", object->type)); /* * If the reference count goes to 0 we start calling * vm_object_terminate() on the object chain. * A ref count of 1 may be a special case depending on the * shadow count being 0 or 1. */ object->ref_count--; if (object->ref_count > 1) { VM_OBJECT_WUNLOCK(object); return; } else if (object->ref_count == 1) { if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) { vp = object->un_pager.swp.swp_tmpfs; vhold(vp); VM_OBJECT_WUNLOCK(object); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD || object->ref_count != 1) { VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); vdrop(vp); return; } if ((object->flags & OBJ_TMPFS) != 0) VOP_UNSET_TEXT(vp); VOP_UNLOCK(vp, 0); vdrop(vp); } if (object->shadow_count == 0 && object->handle == NULL && (object->type == OBJT_DEFAULT || (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS_NODE) == 0))) { vm_object_set_flag(object, OBJ_ONEMAPPING); } else if ((object->shadow_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; robject = LIST_FIRST(&object->shadow_head); KASSERT(robject != NULL, ("vm_object_deallocate: ref_count: %d, shadow_count: %d", object->ref_count, object->shadow_count)); KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object %p", object)); if (!VM_OBJECT_TRYWLOCK(robject)) { /* * Avoid a potential deadlock. */ object->ref_count++; VM_OBJECT_WUNLOCK(object); /* * More likely than not the thread * holding robject's lock has lower * priority than the current thread. * Let the lower priority thread run. */ pause("vmo_de", 1); continue; } /* * Collapse object into its shadow unless its * shadow is dead. In that case, object will * be deallocated by the thread that is * deallocating its shadow. */ if ((robject->flags & OBJ_DEAD) == 0 && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { robject->ref_count++; retry: if (robject->paging_in_progress) { VM_OBJECT_WUNLOCK(object); vm_object_pip_wait(robject, "objde1"); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else if (object->paging_in_progress) { VM_OBJECT_WUNLOCK(robject); object->flags |= OBJ_PIPWNT; VM_OBJECT_SLEEP(object, object, PDROP | PVM, "objde2", 0); VM_OBJECT_WLOCK(robject); temp = robject->backing_object; if (object == temp) { VM_OBJECT_WLOCK(object); goto retry; } } else VM_OBJECT_WUNLOCK(object); if (robject->ref_count == 1) { robject->ref_count--; object = robject; goto doterm; } object = robject; vm_object_collapse(object); VM_OBJECT_WUNLOCK(object); continue; } VM_OBJECT_WUNLOCK(robject); } VM_OBJECT_WUNLOCK(object); return; } doterm: umtx_shm_object_terminated(object); temp = object->backing_object; if (temp != NULL) { KASSERT((object->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object 2 %p", object)); VM_OBJECT_WLOCK(temp); LIST_REMOVE(object, shadow_list); temp->shadow_count--; VM_OBJECT_WUNLOCK(temp); object->backing_object = NULL; } /* * Don't double-terminate, we could be in a termination * recursion due to the terminate having to sync data * to disk. */ if ((object->flags & OBJ_DEAD) == 0) vm_object_terminate(object); else VM_OBJECT_WUNLOCK(object); object = temp; } } /* * vm_object_destroy removes the object from the global object list * and frees the space for the object. */ void vm_object_destroy(vm_object_t object) { /* * Release the allocation charge. */ if (object->cred != NULL) { swap_release_by_cred(object->charge, object->cred); object->charge = 0; crfree(object->cred); object->cred = NULL; } /* * Free the space for the object. */ uma_zfree(obj_zone, object); } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. * This routine may block. */ void vm_object_terminate(vm_object_t object) { vm_page_t p, p_next; VM_OBJECT_ASSERT_WLOCKED(object); /* * Make sure no one uses us. */ vm_object_set_flag(object, OBJ_DEAD); /* * wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); KASSERT(!object->paging_in_progress, ("vm_object_terminate: pageout in progress")); /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { struct vnode *vp = (struct vnode *)object->handle; /* * Clean pages and flush buffers. */ vm_object_page_clean(object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(object); vinvalbuf(vp, V_SAVE, 0, 0); BO_LOCK(&vp->v_bufobj); vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); VM_OBJECT_WLOCK(object); } KASSERT(object->ref_count == 0, ("vm_object_terminate: object with references, ref_count=%d", object->ref_count)); /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting * its "object" field. Specifically, if the page is not * wired, then the effect of this assignment is that * vm_page_free()'s call to vm_page_remove() will return * immediately without modifying the page or the object. */ p->object = NULL; if (p->wire_count == 0) { vm_page_free(p); VM_CNT_INC(v_pfree); } vm_page_unlock(p); } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { vm_radix_reclaim_allnodes(&object->rtree); TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT || object->type == OBJT_SWAP, ("%s: non-swap obj %p has cred", __func__, object)); /* * Let the pager know object is dead. */ vm_pager_deallocate(object); VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); } /* * Make the page read-only so that we can clear the object flags. However, if * this is a nosync mmap then the object is likely to stay dirty so do not * mess with the page and do not clear the object flags. Returns TRUE if the * page should be flushed, and FALSE otherwise. */ static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *clearobjflags) { /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were not * cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) { *clearobjflags = FALSE; return (FALSE); } else { pmap_remove_write(p); return (p->dirty != 0); } } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. Leaves page * on whatever queue it is currently on. If NOSYNC is set then do not * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC), * leaving the object dirty. * * When stuffing pages asynchronously, allow clustering. XXX we need a * synchronous clustering mode implementation. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. * * Returns FALSE if some page from the range was not written, as * reported by the pager, and TRUE otherwise. */ boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags) { vm_page_t np, p; vm_pindex_t pi, tend, tstart; int curgeneration, n, pagerflags; boolean_t clearobjflags, eio, res; VM_OBJECT_ASSERT_WLOCKED(object); /* * The OBJ_MIGHTBEDIRTY flag is only set for OBJT_VNODE * objects. The check below prevents the function from * operating on non-vnode objects. */ if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || object->resident_page_count == 0) return (TRUE); pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; tstart = OFF_TO_IDX(start); tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); clearobjflags = tstart == 0 && tend >= object->size; res = TRUE; rescan: curgeneration = object->generation; for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; if (vm_page_sleep_if_busy(p, "vpcwai")) { if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } np = vm_page_find_least(object, pi); continue; } if (!vm_object_page_remove_write(p, flags, &clearobjflags)) continue; n = vm_object_page_collect_flush(object, p, pagerflags, flags, &clearobjflags, &eio); if (eio) { res = FALSE; clearobjflags = FALSE; } if (object->generation != curgeneration) { if ((flags & OBJPC_SYNC) != 0) goto rescan; else clearobjflags = FALSE; } /* * If the VOP_PUTPAGES() did a truncated write, so * that even the first page of the run is not fully * written, vm_pageout_flush() returns 0 as the run * length. Since the condition that caused truncated * write may be permanent, e.g. exhausted free space, * accepting n == 0 would cause an infinite loop. * * Forwarding the iterator leaves the unwritten page * behind, but there is not much we can do there if * filesystem refuses to write it. */ if (n == 0) { n = 1; clearobjflags = FALSE; } np = vm_page_find_least(object, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif if (clearobjflags) vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY); return (res); } static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *clearobjflags, boolean_t *eio) { vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; vm_page_lock_assert(p, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(object); count = 1; mreq = 0; for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; } for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); if (tp == NULL || vm_page_busied(tp)) break; if (!vm_object_page_remove_write(tp, flags, clearobjflags)) break; p_first = tp; mreq++; } for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) ma[i] = tp; vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio); return (runlen); } /* * Note that there is absolutely no sense in writing out * anonymous objects, so we track down the vnode object * to write out. * We invalidate (remove) all pages from the address space * for semantic correctness. * * If the backing object is a device object with unmanaged pages, then any * mappings to the specified range of pages must be removed before this * function is called. * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ boolean_t vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, boolean_t syncio, boolean_t invalidate) { vm_object_t backing_object; struct vnode *vp; struct mount *mp; int error, flags, fsync_after; boolean_t res; if (object == NULL) return (TRUE); res = TRUE; error = 0; VM_OBJECT_WLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_WLOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_WUNLOCK(object); object = backing_object; if (object->size < OFF_TO_IDX(offset + size)) size = IDX_TO_OFF(object->size) - offset; } /* * Flush pages if writing is allowed, invalidate them * if invalidation requested. Pages undergoing I/O * will be ignored by vm_object_page_remove(). * * We cannot lock the vnode and then wait for paging * to complete without deadlocking against vm_fault. * Instead we simply call vm_object_page_remove() and * allow it to block internally on a page-by-page * basis when it encounters pages undergoing async * I/O. */ if (object->type == OBJT_VNODE && (object->flags & OBJ_MIGHTBEDIRTY) != 0) { vp = object->handle; VM_OBJECT_WUNLOCK(object); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (syncio && !invalidate && offset == 0 && atop(size) == object->size) { /* * If syncing the whole mapping of the file, * it is faster to schedule all the writes in * async mode, also allowing the clustering, * and then wait for i/o to complete. */ flags = 0; fsync_after = TRUE; } else { flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0; fsync_after = FALSE; } VM_OBJECT_WLOCK(object); res = vm_object_page_clean(object, offset, offset + size, flags); VM_OBJECT_WUNLOCK(object); if (fsync_after) error = VOP_FSYNC(vp, MNT_WAIT, curthread); VOP_UNLOCK(vp, 0); vn_finished_write(mp); if (error != 0) res = FALSE; VM_OBJECT_WLOCK(object); } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { if (object->type == OBJT_DEVICE) /* * The option OBJPR_NOTMAPPED must be passed here * because vm_object_page_remove() cannot remove * unmanaged mappings. */ flags = OBJPR_NOTMAPPED; else if (old_msync) flags = 0; else flags = OBJPR_CLEANONLY; vm_object_page_remove(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), flags); } VM_OBJECT_WUNLOCK(object); return (res); } /* * Determine whether the given advice can be applied to the object. Advice is * not applied to unmanaged pages since they never belong to page queues, and * since MADV_FREE is destructive, it can apply only to anonymous pages that * have been mapped at most once. */ static bool vm_object_advice_applies(vm_object_t object, int advice) { if ((object->flags & OBJ_UNMANAGED) != 0) return (false); if (advice != MADV_FREE) return (true); return ((object->type == OBJT_DEFAULT || object->type == OBJT_SWAP) && (object->flags & OBJ_ONEMAPPING) != 0); } static void vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex, vm_size_t size) { if (advice == MADV_FREE && object->type == OBJT_SWAP) swap_pager_freespace(object, pindex, size); } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) * * Activate the specified pages if they are resident. * * MADV_DONTNEED (any object) * * Deactivate the specified pages if they are resident. * * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, * OBJ_ONEMAPPING only) * * Deactivate and clean the specified pages if they are * resident. This permits the process to reuse the pages * without faulting or the kernel to reclaim the pages * without I/O. */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, int advice) { vm_pindex_t tpindex; vm_object_t backing_object, tobject; vm_page_t m, tm; if (object == NULL) return; relookup: VM_OBJECT_WLOCK(object); if (!vm_object_advice_applies(object, advice)) { VM_OBJECT_WUNLOCK(object); return; } for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { tobject = object; /* * If the next page isn't resident in the top-level object, we * need to search the shadow chain. When applying MADV_FREE, we * take care to release any swap space used to store * non-resident pages. */ if (m == NULL || pindex < m->pindex) { /* * Optimize a common case: if the top-level object has * no backing object, we can skip over the non-resident * range in constant time. */ if (object->backing_object == NULL) { tpindex = (m != NULL && m->pindex < end) ? m->pindex : end; vm_object_madvise_freespace(object, advice, pindex, tpindex - pindex); if ((pindex = tpindex) == end) break; goto next_page; } tpindex = pindex; do { vm_object_madvise_freespace(tobject, advice, tpindex, 1); /* * Prepare to search the next object in the * chain. */ backing_object = tobject->backing_object; if (backing_object == NULL) goto next_pindex; VM_OBJECT_WLOCK(backing_object); tpindex += OFF_TO_IDX(tobject->backing_object_offset); if (tobject != object) VM_OBJECT_WUNLOCK(tobject); tobject = backing_object; if (!vm_object_advice_applies(tobject, advice)) goto next_pindex; } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { next_page: tm = m; m = TAILQ_NEXT(m, listq); } /* * If the page is not in a normal state, skip it. */ if (tm->valid != VM_PAGE_BITS_ALL) goto next_pindex; vm_page_lock(tm); if (tm->hold_count != 0 || tm->wire_count != 0) { vm_page_unlock(tm); goto next_pindex; } KASSERT((tm->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", tm)); KASSERT((tm->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", tm)); if (vm_page_busied(tm)) { if (object != tobject) VM_OBJECT_WUNLOCK(tobject); VM_OBJECT_WUNLOCK(object); if (advice == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(tm, PGA_REFERENCED); } vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } vm_page_advise(tm, advice); vm_page_unlock(tm); vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); next_pindex: if (tobject != object) VM_OBJECT_WUNLOCK(tobject); } VM_OBJECT_WUNLOCK(object); } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow( vm_object_t *object, /* IN/OUT */ vm_ooffset_t *offset, /* IN/OUT */ vm_size_t length) { vm_object_t source; vm_object_t result; source = *object; /* * Don't create the new object if the old object isn't shared. */ if (source != NULL) { VM_OBJECT_WLOCK(source); if (source->ref_count == 1 && source->handle == NULL && (source->type == OBJT_DEFAULT || source->type == OBJT_SWAP)) { VM_OBJECT_WUNLOCK(source); return; } VM_OBJECT_WUNLOCK(source); } /* * Allocate a new object with the given length. */ result = vm_object_allocate(OBJT_DEFAULT, atop(length)); /* * The new object shadows the source object, adding a reference to it. * Our caller changes his reference to point to the new object, * removing a reference to the source object. Net result: no change * of reference count. * * Try to optimize the result object's page color when shadowing * in order to maintain page coloring consistency in the combined * shadowed object. */ result->backing_object = source; /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; if (source != NULL) { VM_OBJECT_WLOCK(source); LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); source->shadow_count++; #if VM_NRESERVLEVEL > 0 result->flags |= source->flags & OBJ_COLORED; result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif VM_OBJECT_WUNLOCK(source); } /* * Return the new things */ *offset = 0; *object = result; } /* * vm_object_split: * * Split the pages in a map entry into a new object. This affords * easier removal of unused pages, and keeps object inheritance from * being a negative impact on memory usage. */ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; vm_object_t orig_object, new_object, source; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) return; if (orig_object->ref_count <= 1) return; VM_OBJECT_WUNLOCK(orig_object); offidxstart = OFF_TO_IDX(entry->offset); size = atop(entry->end - entry->start); /* * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ new_object = vm_object_allocate(OBJT_DEFAULT, size); /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_WLOCK(new_object); VM_OBJECT_WLOCK(orig_object); source = orig_object->backing_object; if (source != NULL) { VM_OBJECT_WLOCK(source); if ((source->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(source); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); vm_object_deallocate(new_object); VM_OBJECT_WLOCK(orig_object); return; } LIST_INSERT_HEAD(&source->shadow_head, new_object, shadow_list); source->shadow_count++; vm_object_reference_locked(source); /* for new_object */ vm_object_clear_flag(source, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(source); new_object->backing_object_offset = orig_object->backing_object_offset + entry->offset; new_object->backing_object = source; } if (orig_object->cred != NULL) { new_object->cred = orig_object->cred; crhold(orig_object->cred); new_object->charge = ptoa(size); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); /* * We must wait for pending I/O to complete before we can * rename the page. * * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ if (vm_page_busied(m)) { VM_OBJECT_WUNLOCK(new_object); vm_page_lock(m); VM_OBJECT_WUNLOCK(orig_object); vm_page_busy_sleep(m, "spltwt", false); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } /* vm_page_rename() will dirty the page. */ if (vm_page_rename(m, new_object, idx)) { VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); VM_WAIT; VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } #if VM_NRESERVLEVEL > 0 /* * If some of the reservation's allocated pages remain with * the original object, then transferring the reservation to * the new object is neither particularly beneficial nor * particularly harmful as compared to leaving the reservation * with the original object. If, however, all of the * reservation's allocated pages are transferred to the new * object, then transferring the reservation is typically * beneficial. Determining which of these two cases applies * would be more costly than unconditionally renaming the * reservation. */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif if (orig_object->type == OBJT_SWAP) vm_page_xbusy(m); } if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_xunbusy(m); } VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); entry->object.vm_object = new_object; entry->offset = 0LL; vm_object_deallocate(orig_object); VM_OBJECT_WLOCK(new_object); } #define OBSC_COLLAPSE_NOWAIT 0x0002 #define OBSC_COLLAPSE_WAIT 0x0004 static vm_page_t vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, int op) { vm_object_t backing_object; VM_OBJECT_ASSERT_WLOCKED(object); backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(backing_object); KASSERT(p == NULL || vm_page_busied(p), ("unbusy page %p", p)); KASSERT(p == NULL || p->object == object || p->object == backing_object, ("invalid ownership %p %p %p", p, object, backing_object)); if ((op & OBSC_COLLAPSE_NOWAIT) != 0) return (next); if (p != NULL) vm_page_lock(p); VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(backing_object); if (p == NULL) VM_WAIT; else vm_page_busy_sleep(p, "vmocol", false); VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); return (TAILQ_FIRST(&backing_object->memq)); } static bool vm_object_scan_all_shadowed(vm_object_t object) { vm_object_t backing_object; vm_page_t p, pp; vm_pindex_t backing_offset_index, new_pindex, pi, ps; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; if (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) return (false); pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); p = vm_page_find_least(backing_object, pi); ps = swap_pager_find_least(backing_object, pi); /* * Only check pages inside the parent object's range and * inside the parent object's mapping of the backing object. */ for (;; pi++) { if (p != NULL && p->pindex < pi) p = TAILQ_NEXT(p, listq); if (ps < pi) ps = swap_pager_find_least(backing_object, pi); if (p == NULL && ps >= backing_object->size) break; else if (p == NULL) pi = ps; else pi = MIN(p->pindex, ps); new_pindex = pi - backing_offset_index; if (new_pindex >= object->size) break; /* * See if the parent has the page or if the parent's object * pager has the page. If the parent has the page but the page * is not valid, the parent's object pager must have the page. * * If this fails, the parent does not completely shadow the * object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); if ((pp == NULL || pp->valid == 0) && !vm_pager_has_page(object, new_pindex, NULL, NULL)) return (false); } return (true); } static bool vm_object_collapse_scan(vm_object_t object, int op) { vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* * Initial conditions */ if ((op & OBSC_COLLAPSE_WAIT) != 0) vm_object_set_flag(backing_object, OBJ_DEAD); /* * Our scan */ for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; /* * Check for busy page */ if (vm_page_busied(p)) { next = vm_object_collapse_scan_wait(object, p, next, op); continue; } KASSERT(p->object == backing_object, ("vm_object_collapse_scan: object mismatch")); if (p->pindex < backing_offset_index || new_pindex >= object->size) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); /* * Page is out of the parent object's range, we can * simply destroy it. */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); continue; } pp = vm_page_lookup(object, new_pindex); if (pp != NULL && vm_page_busied(pp)) { /* * The page in the parent is busy and possibly not * (yet) valid. Until its state is finalized by the * busy bit owner, we can't tell whether it shadows the * original page. Therefore, we must either skip it * and the original (backing_object) page or wait for * its state to be finalized. * * This is due to a race with vm_fault() where we must * unbusy the original (backing_obj) page before we can * (re)lock the parent. Hence we can get here. */ next = vm_object_collapse_scan_wait(object, pp, next, op); continue; } KASSERT(pp == NULL || pp->valid != 0, ("unbusy invalid page %p", pp)); if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, NULL)) { /* * The page already exists in the parent OR swap exists * for this location in the parent. Leave the parent's * page alone. Destroy the original page from the * backing object. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); continue; } /* * Page does not exist in parent, rename the page from the * backing object to the main object. * * If the page was mapped to a process, it can remain mapped * through the rename. vm_page_rename() will dirty the page. */ if (vm_page_rename(p, object, new_pindex)) { next = vm_object_collapse_scan_wait(object, NULL, next, op); continue; } /* Use the old pindex to free the right page. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, new_pindex + backing_offset_index, 1); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(p, object, backing_object, backing_offset_index); #endif } return (true); } /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. */ static void vm_object_qcollapse(vm_object_t object) { vm_object_t backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(backing_object); if (backing_object->ref_count != 1) return; vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT); } /* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(vm_object_t object) { vm_object_t backing_object, new_backing_object; VM_OBJECT_ASSERT_WLOCKED(object); while (TRUE) { /* * Verify that the conditions are right for collapse: * * The object exists and the backing object exists. */ if ((backing_object = object->backing_object) == NULL) break; /* * we check the backing object first, because it is most likely * not collapsable. */ VM_OBJECT_WLOCK(backing_object); if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) || (backing_object->flags & OBJ_DEAD) || object->handle != NULL || (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP) || (object->flags & OBJ_DEAD)) { VM_OBJECT_WUNLOCK(backing_object); break; } if (object->paging_in_progress != 0 || backing_object->paging_in_progress != 0) { vm_object_qcollapse(object); VM_OBJECT_WUNLOCK(backing_object); break; } /* * We know that we can either collapse the backing object (if * the parent is the only reference to it) or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. * * This is ignoring pager-backed pages such as swap pages. * vm_object_collapse_scan fails the shadowing test in this * case. */ if (backing_object->ref_count == 1) { vm_object_pip_add(object, 1); vm_object_pip_add(backing_object, 1); /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT); #if VM_NRESERVLEVEL > 0 /* * Break any reservations from backing_object. */ if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) vm_reserv_break_all(backing_object); #endif /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case * the backing_object's and object's locks are * released and reacquired. * Since swap_pager_copy() is being asked to * destroy the source, it will change the * backing_object's type to OBJT_DEFAULT. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); } /* * Object now shadows whatever backing_object did. * Note that the reference to * backing_object->backing_object moves from within * backing_object to within object. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; if (backing_object->backing_object) { VM_OBJECT_WLOCK(backing_object->backing_object); LIST_REMOVE(backing_object, shadow_list); LIST_INSERT_HEAD( &backing_object->backing_object->shadow_head, object, shadow_list); /* * The shadow_count has not changed. */ VM_OBJECT_WUNLOCK(backing_object->backing_object); } object->backing_object = backing_object->backing_object; object->backing_object_offset += backing_object->backing_object_offset; /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ KASSERT(backing_object->ref_count == 1, ( "backing_object %p was somehow re-referenced during collapse!", backing_object)); vm_object_pip_wakeup(backing_object); backing_object->type = OBJT_DEAD; backing_object->ref_count = 0; VM_OBJECT_WUNLOCK(backing_object); vm_object_destroy(backing_object); vm_object_pip_wakeup(object); object_collapses++; } else { /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. */ if (object->resident_page_count != object->size && !vm_object_scan_all_shadowed(object)) { VM_OBJECT_WUNLOCK(backing_object); break; } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; new_backing_object = backing_object->backing_object; if ((object->backing_object = new_backing_object) != NULL) { VM_OBJECT_WLOCK(new_backing_object); LIST_INSERT_HEAD( &new_backing_object->shadow_head, object, shadow_list ); new_backing_object->shadow_count++; vm_object_reference_locked(new_backing_object); VM_OBJECT_WUNLOCK(new_backing_object); object->backing_object_offset += backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ backing_object->ref_count--; VM_OBJECT_WUNLOCK(backing_object); object_bypasses++; } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: * * For the given object, either frees or invalidates each of the * specified pages. In general, a page is freed. However, if a page is * wired for any reason other than the existence of a managed, wired * mapping, then it may be invalidated but not removed from the object. * Pages are specified by the given range ["start", "end") and the option * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range * extends from "start" to the end of the object. If the option * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the * specified range are affected. If the option OBJPR_NOTMAPPED is * specified, then the pages within the specified range must have no * mappings. Otherwise, if this option is not specified, any mappings to * the specified pages are removed before the pages are freed or * invalidated. * * In general, this operation should only be performed on objects that * contain managed pages. There are, however, two exceptions. First, it * is performed on the kernel and kmem objects by vm_map_entry_delete(). * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- * backed pages. In both of these cases, the option OBJPR_CLEANONLY must * not be specified and the option OBJPR_NOTMAPPED must be specified. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { vm_page_t p, next; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * If the page is wired for any reason besides the existence * of managed, wired mappings, then it cannot be freed. For * example, fictitious pages, which represent device memory, * are inherently wired and cannot be freed. They can, * however, be invalidated if the option OBJPR_CLEANONLY is * not specified. */ vm_page_lock(p); if (vm_page_xbusied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopax", true); VM_OBJECT_WLOCK(object); goto again; } if (p->wire_count != 0) { if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_all(p); if ((options & OBJPR_CLEANONLY) == 0) { p->valid = 0; vm_page_undirty(p); } goto next; } if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopar", false); VM_OBJECT_WLOCK(object); goto again; } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_write(p); if (p->dirty) goto next; } if ((options & OBJPR_NOTMAPPED) == 0) pmap_remove_all(p); vm_page_free(p); next: vm_page_unlock(p); } vm_object_pip_wakeup(object); } /* * vm_object_page_noreuse: * * For the given object, attempt to move the specified pages to * the head of the inactive queue. This bypasses regular LRU * operation and allows the pages to be reused quickly under memory * pressure. If a page is wired for any reason, then it will not * be queued. Pages are specified by the range ["start", "end"). * As a special case, if "end" is zero, then the range extends from * "start" to the end of the object. * * This operation should only be performed on objects that * contain non-fictitious, managed pages. * * The object must be locked. */ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { struct mtx *mtx, *new_mtx; vm_page_t p, next; VM_OBJECT_ASSERT_LOCKED(object); KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, ("vm_object_page_noreuse: illegal object %p", object)); if (object->resident_page_count == 0) return; p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ mtx = NULL; for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(p); if (mtx != new_mtx) { if (mtx != NULL) mtx_unlock(mtx); mtx = new_mtx; mtx_lock(mtx); } vm_page_deactivate_noreuse(p); } if (mtx != NULL) mtx_unlock(mtx); } /* * Populate the specified range of the object with valid pages. Returns * TRUE if the range is successfully populated and FALSE otherwise. * * Note: This function should be optimized to pass a larger array of * pages to vm_pager_get_pages() before it is applied to a non- * OBJT_DEVICE object. * * The object must be locked. */ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m; vm_pindex_t pindex; int rv; VM_OBJECT_ASSERT_WLOCKED(object); for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); break; } } /* * Keep "m" busy because a subsequent iteration may unlock * the object. */ } if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_xunbusy(m); m = TAILQ_NEXT(m, listq); } } return (pindex == end); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * prev_size Size of reference to prev_object * next_size Size of reference to the second object * reserved Indicator that extension region has * swap accounted for * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) { vm_pindex_t next_pindex; if (prev_object == NULL) return (TRUE); VM_OBJECT_WLOCK(prev_object); if ((prev_object->type != OBJT_DEFAULT && prev_object->type != OBJT_SWAP) || (prev_object->flags & OBJ_TMPFS_NODE) != 0) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Try to collapse the object first */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->backing_object != NULL) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; if ((prev_object->ref_count > 1) && (prev_object->size != next_pindex)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Account for the charge. */ if (prev_object->cred != NULL) { /* * If prev_object was charged, then this mapping, * although not charged now, may become writable * later. Non-NULL cred in the object would prevent * swap reservation during enabling of the write * access, so reserve swap now. Failed reservation * cause allocation of the separate object for the map * entry, and swap reservation for this entry is * managed in appropriate time. */ if (!reserved && !swap_reserve_by_cred(ptoa(next_size), prev_object->cred)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_object->charge += ptoa(next_size); } /* * Remove any pages that may still be in the object from a previous * deallocation. */ if (next_pindex < prev_object->size) { vm_object_page_remove(prev_object, next_pindex, next_pindex + next_size, 0); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); #if 0 if (prev_object->cred != NULL) { KASSERT(prev_object->charge >= ptoa(prev_object->size - next_pindex), ("object %p overcharged 1 %jx %jx", prev_object, (uintmax_t)next_pindex, (uintmax_t)next_size)); prev_object->charge -= ptoa(prev_object->size - next_pindex); } #endif } /* * Extend the object if necessary. */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; VM_OBJECT_WUNLOCK(prev_object); return (TRUE); } void vm_object_set_writeable_dirty(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_VNODE) { if ((object->flags & OBJ_TMPFS_NODE) != 0) { KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs")); vm_object_set_flag(object, OBJ_TMPFS_DIRTY); } return; } object->generation++; if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) return; vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } /* * vm_object_unwire: * * For each page offset within the specified range of the given object, * find the highest-level page in the shadow chain and unwire it. A page * must exist at every page offset, and the highest-level page must be * wired. */ void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue) { vm_object_t tobject; vm_page_t m, tm; vm_pindex_t end_pindex, pindex, tpindex; int depth, locked_depth; KASSERT((offset & PAGE_MASK) == 0, ("vm_object_unwire: offset is not page aligned")); KASSERT((length & PAGE_MASK) == 0, ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); /* The wired count of a fictitious page never changes. */ if ((object->flags & OBJ_FICTITIOUS) != 0) return; pindex = OFF_TO_IDX(offset); end_pindex = pindex + atop(length); locked_depth = 1; VM_OBJECT_RLOCK(object); m = vm_page_find_least(object, pindex); while (pindex < end_pindex) { if (m == NULL || pindex < m->pindex) { /* * The first object in the shadow chain doesn't * contain a page at the current index. Therefore, * the page must exist in a backing object. */ tobject = object; tpindex = pindex; depth = 0; do { tpindex += OFF_TO_IDX(tobject->backing_object_offset); tobject = tobject->backing_object; KASSERT(tobject != NULL, ("vm_object_unwire: missing page")); if ((tobject->flags & OBJ_FICTITIOUS) != 0) goto next_page; depth++; if (depth == locked_depth) { locked_depth++; VM_OBJECT_RLOCK(tobject); } } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { tm = m; m = TAILQ_NEXT(m, listq); } vm_page_lock(tm); vm_page_unwire(tm, queue); vm_page_unlock(tm); next_page: pindex++; } /* Release the accumulated object locks. */ for (depth = 0; depth < locked_depth; depth++) { tobject = object->backing_object; VM_OBJECT_RUNLOCK(object); object = tobject; } } struct vnode * vm_object_vnode(vm_object_t object) { VM_OBJECT_ASSERT_LOCKED(object); if (object->type == OBJT_VNODE) return (object->handle); if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) return (object->un_pager.swp.swp_tmpfs); return (NULL); } static int sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) { struct kinfo_vmobject kvo; char *fullpath, *freepath; struct vnode *vp; struct vattr va; vm_object_t obj; vm_page_t m; int count, error; if (req->oldptr == NULL) { /* * If an old buffer has not been provided, generate an * estimate of the space needed for a subsequent call. */ mtx_lock(&vm_object_list_mtx); count = 0; TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; count++; } mtx_unlock(&vm_object_list_mtx); return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * count * 11 / 10)); } error = 0; /* * VM objects are type stable and are never removed from the * list once added. This allows us to safely read obj->object_list * after reacquiring the VM object lock. */ mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; VM_OBJECT_RLOCK(obj); if (obj->type == OBJT_DEAD) { VM_OBJECT_RUNLOCK(obj); continue; } mtx_unlock(&vm_object_list_mtx); kvo.kvo_size = ptoa(obj->size); kvo.kvo_resident = obj->resident_page_count; kvo.kvo_ref_count = obj->ref_count; kvo.kvo_shadow_count = obj->shadow_count; kvo.kvo_memattr = obj->memattr; kvo.kvo_active = 0; kvo.kvo_inactive = 0; TAILQ_FOREACH(m, &obj->memq, listq) { /* * A page may belong to the object but be * dequeued and set to PQ_NONE while the * object lock is not held. This makes the * reads of m->queue below racy, and we do not * count pages set to PQ_NONE. However, this * sysctl is only meant to give an * approximation of the system anyway. */ if (vm_page_active(m)) kvo.kvo_active++; else if (vm_page_inactive(m)) kvo.kvo_inactive++; } kvo.kvo_vn_fileid = 0; kvo.kvo_vn_fsid = 0; + kvo.kvo_vn_fsid_freebsd11 = 0; freepath = NULL; fullpath = ""; vp = NULL; switch (obj->type) { case OBJT_DEFAULT: kvo.kvo_type = KVME_TYPE_DEFAULT; break; case OBJT_VNODE: kvo.kvo_type = KVME_TYPE_VNODE; vp = obj->handle; vref(vp); break; case OBJT_SWAP: kvo.kvo_type = KVME_TYPE_SWAP; break; case OBJT_DEVICE: kvo.kvo_type = KVME_TYPE_DEVICE; break; case OBJT_PHYS: kvo.kvo_type = KVME_TYPE_PHYS; break; case OBJT_DEAD: kvo.kvo_type = KVME_TYPE_DEAD; break; case OBJT_SG: kvo.kvo_type = KVME_TYPE_SG; break; case OBJT_MGTDEVICE: kvo.kvo_type = KVME_TYPE_MGTDEVICE; break; default: kvo.kvo_type = KVME_TYPE_UNKNOWN; break; } VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(curthread, vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { kvo.kvo_vn_fileid = va.va_fileid; kvo.kvo_vn_fsid = va.va_fsid; + kvo.kvo_vn_fsid_freebsd11 = va.va_fsid; + /* truncate */ } vput(vp); } strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) + strlen(kvo.kvo_path) + 1; kvo.kvo_structsize = roundup(kvo.kvo_structsize, sizeof(uint64_t)); error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize); mtx_lock(&vm_object_list_mtx); if (error) break; } mtx_unlock(&vm_object_list_mtx); return (error); } SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", "List of VM objects"); #include "opt_ddb.h" #ifdef DDB #include #include #include static int _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; int entcount; if (map == 0) return 0; if (entry == 0) { tmpe = map->header.next; entcount = map->nentries; while (entcount-- && (tmpe != &map->header)) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; tmpe = tmpm->header.next; entcount = tmpm->nentries; while (entcount-- && tmpe != &tmpm->header) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) if (obj == object) { return 1; } } return 0; } static int vm_object_in_map(vm_object_t object) { struct proc *p; /* sx_slock(&allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { /* sx_sunlock(&allproc_lock); */ return 1; } } /* sx_sunlock(&allproc_lock); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; return 0; } DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ TAILQ_FOREACH(object, &vm_object_list, object_list) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { db_printf("vmochk: internal obj has zero ref count: %ld\n", (long)object->size); } if (!vm_object_in_map(object)) { db_printf( "vmochk: internal obj is not in a map: " "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", object->ref_count, (u_long)object->size, (u_long)object->size, (void *)object->backing_object); } } } } /* * vm_object_print: [ debug ] */ DB_SHOW_COMMAND(object, vm_object_print_static) { /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; vm_page_t p; /* XXX count is an (unused) arg. Avoid shadowing it. */ #define count was_count int count; if (object == NULL) return; db_iprintf( "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n", object, (int)object->type, (uintmax_t)object->size, object->resident_page_count, object->ref_count, object->flags, object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge); db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", object->shadow_count, object->backing_object ? object->backing_object->ref_count : 0, object->backing_object, (uintmax_t)object->backing_object_offset); if (!full) return; db_indent += 2; count = 0; TAILQ_FOREACH(p, &object->memq, listq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { db_printf("\n"); db_iprintf(" ..."); count = 0; } else db_printf(","); count++; db_printf("(off=0x%jx,page=0x%jx)", (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); } if (count != 0) db_printf("\n"); db_indent -= 2; } /* XXX. */ #undef count /* XXX need this non-static entry for calling from vm_map_print. */ void vm_object_print( /* db_expr_t */ long addr, boolean_t have_addr, /* db_expr_t */ long count, char *modif) { vm_object_print_static(addr, have_addr, count, modif); } DB_SHOW_COMMAND(vmopag, vm_object_print_pages) { vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; vm_page_t m, prev_m; int rcount, nl, c; nl = 0; TAILQ_FOREACH(object, &vm_object_list, object_list) { db_printf("new object: %p\n", (void *)object); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; fidx = 0; pa = -1; TAILQ_FOREACH(m, &object->memq, listq) { if (m->pindex > 128) break; if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; } } if (rcount && (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } fidx = m->pindex; pa = VM_PAGE_TO_PHYS(m); rcount = 1; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } } } #endif /* DDB */ Index: head/sys/vm/vm_param.h =================================================================== --- head/sys/vm/vm_param.h (revision 318735) +++ head/sys/vm/vm_param.h (revision 318736) @@ -1,134 +1,134 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_param.h 8.1 (Berkeley) 6/11/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Machine independent virtual memory parameters. */ #ifndef _VM_PARAM_ #define _VM_PARAM_ #include /* * CTL_VM identifiers */ #define VM_TOTAL 1 /* struct vmtotal */ #define VM_METER VM_TOTAL/* deprecated, use VM_TOTAL */ #define VM_LOADAVG 2 /* struct loadavg */ #define VM_V_FREE_MIN 3 /* vm_cnt.v_free_min */ #define VM_V_FREE_TARGET 4 /* vm_cnt.v_free_target */ #define VM_V_FREE_RESERVED 5 /* vm_cnt.v_free_reserved */ #define VM_V_INACTIVE_TARGET 6 /* vm_cnt.v_inactive_target */ #define VM_OBSOLETE_7 7 /* unused, formerly v_cache_min */ #define VM_OBSOLETE_8 8 /* unused, formerly v_cache_max */ #define VM_V_PAGEOUT_FREE_MIN 9 /* vm_cnt.v_pageout_free_min */ #define VM_OBSOLETE_10 10 /* pageout algorithm */ #define VM_SWAPPING_ENABLED 11 /* swapping enabled */ #define VM_MAXID 12 /* number of valid vm ids */ /* * Structure for swap device statistics */ -#define XSWDEV_VERSION 1 +#define XSWDEV_VERSION 2 struct xswdev { u_int xsw_version; dev_t xsw_dev; int xsw_flags; int xsw_nblks; int xsw_used; }; /* * Return values from the VM routines. */ #define KERN_SUCCESS 0 #define KERN_INVALID_ADDRESS 1 #define KERN_PROTECTION_FAILURE 2 #define KERN_NO_SPACE 3 #define KERN_INVALID_ARGUMENT 4 #define KERN_FAILURE 5 #define KERN_RESOURCE_SHORTAGE 6 #define KERN_NOT_RECEIVER 7 #define KERN_NO_ACCESS 8 #ifndef PA_LOCK_COUNT #ifdef SMP #define PA_LOCK_COUNT 32 #else #define PA_LOCK_COUNT 1 #endif /* !SMP */ #endif /* !PA_LOCK_COUNT */ #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT)) extern unsigned long maxtsiz; extern unsigned long dfldsiz; extern unsigned long maxdsiz; extern unsigned long dflssiz; extern unsigned long maxssiz; extern unsigned long sgrowsiz; #endif /* _KERNEL */ #endif /* ASSEMBLER */ #endif /* _VM_PARAM_ */ Index: head/usr.bin/kdump/kdump.c =================================================================== --- head/usr.bin/kdump/kdump.c (revision 318735) +++ head/usr.bin/kdump/kdump.c (revision 318736) @@ -1,2026 +1,2025 @@ /*- * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)kdump.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #define _WANT_KERNEL_ERRNO #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_LIBCASPER #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ktrace.h" #ifdef HAVE_LIBCASPER #include #include #include #endif u_int abidump(struct ktr_header *); int fetchprocinfo(struct ktr_header *, u_int *); int fread_tail(void *, int, int); void dumpheader(struct ktr_header *); void ktrsyscall(struct ktr_syscall *, u_int); void ktrsysret(struct ktr_sysret *, u_int); void ktrnamei(char *, int); void hexdump(char *, int, int); void visdump(char *, int, int); void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); void ktrcapfail(struct ktr_cap_fail *); void ktrfault(struct ktr_fault *); void ktrfaultend(struct ktr_faultend *); void usage(void); #define TIMESTAMP_NONE 0x0 #define TIMESTAMP_ABSOLUTE 0x1 #define TIMESTAMP_ELAPSED 0x2 #define TIMESTAMP_RELATIVE 0x4 static int timestamp, decimal, fancy = 1, suppressdata, tail, threads, maxdata, resolv = 0, abiflag = 0, syscallno = 0; static const char *tracefile = DEF_TRACEFILE; static struct ktr_header ktr_header; #define TIME_FORMAT "%b %e %T %Y" #define eqs(s1, s2) (strcmp((s1), (s2)) == 0) #define print_number64(first,i,n,c) do { \ uint64_t __v; \ \ if (quad_align && (((ptrdiff_t)((i) - (first))) & 1) == 1) { \ (i)++; \ (n)--; \ } \ if (quad_slots == 2) \ __v = (uint64_t)(uint32_t)(i)[0] | \ ((uint64_t)(uint32_t)(i)[1]) << 32; \ else \ __v = (uint64_t)*(i); \ if (decimal) \ printf("%c%jd", (c), (intmax_t)__v); \ else \ printf("%c%#jx", (c), (uintmax_t)__v); \ (i) += quad_slots; \ (n) -= quad_slots; \ (c) = ','; \ } while (0) #define print_number(i,n,c) do { \ if (decimal) \ printf("%c%jd", c, (intmax_t)*i); \ else \ printf("%c%#jx", c, (uintmax_t)(u_register_t)*i); \ i++; \ n--; \ c = ','; \ } while (0) struct proc_info { TAILQ_ENTRY(proc_info) info; u_int sv_flags; pid_t pid; }; static TAILQ_HEAD(trace_procs, proc_info) trace_procs; #ifdef HAVE_LIBCASPER static cap_channel_t *cappwd, *capgrp; #endif static void strerror_init(void) { /* * Cache NLS data before entering capability mode. * XXXPJD: There should be strerror_init() and strsignal_init() in libc. */ (void)catopen("libc", NL_CAT_LOCALE); } static void localtime_init(void) { time_t ltime; /* * Allow localtime(3) to cache /etc/localtime content before entering * capability mode. * XXXPJD: There should be localtime_init() in libc. */ (void)time(<ime); (void)localtime(<ime); } #ifdef HAVE_LIBCASPER static int cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp) { cap_channel_t *capcas, *cappwdloc, *capgrploc; const char *cmds[1], *fields[1]; capcas = cap_init(); if (capcas == NULL) { err(1, "unable to create casper process"); exit(1); } cappwdloc = cap_service_open(capcas, "system.pwd"); capgrploc = cap_service_open(capcas, "system.grp"); /* Casper capability no longer needed. */ cap_close(capcas); if (cappwdloc == NULL || capgrploc == NULL) { if (cappwdloc == NULL) warn("unable to open system.pwd service"); if (capgrploc == NULL) warn("unable to open system.grp service"); exit(1); } /* Limit system.pwd to only getpwuid() function and pw_name field. */ cmds[0] = "getpwuid"; if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0) err(1, "unable to limit system.pwd service"); fields[0] = "pw_name"; if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0) err(1, "unable to limit system.pwd service"); /* Limit system.grp to only getgrgid() function and gr_name field. */ cmds[0] = "getgrgid"; if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0) err(1, "unable to limit system.grp service"); fields[0] = "gr_name"; if (cap_grp_limit_fields(capgrploc, fields, 1) < 0) err(1, "unable to limit system.grp service"); *cappwdp = cappwdloc; *capgrpp = capgrploc; return (0); } #endif /* HAVE_LIBCASPER */ static void print_integer_arg(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("", value); else printf("", value); } } /* Like print_integer_arg but unknown values are treated as valid. */ static void print_integer_arg_valid(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("%d", value); else printf("%#x", value); } } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg0(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_fileflags(fflags_t value) { bool invalid; fflags_t rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !sysdecode_fileflags(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_filemode(int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#o<", value); invalid = !sysdecode_filemode(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), uint32_t value) { bool invalid; uint32_t rem; printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_argul(bool (*decoder)(FILE *, u_long, u_long *), u_long value) { bool invalid; u_long rem; if (value == 0) { printf("0"); return; } printf("%#lx<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%lu", rem); } int main(int argc, char *argv[]) { int ch, ktrlen, size; void *m; int trpoints = ALL_POINTS; int drop_logged; pid_t pid = 0; u_int sv_flags; setlocale(LC_CTYPE, ""); timestamp = TIMESTAMP_NONE; while ((ch = getopt(argc,argv,"f:dElm:np:AHRrSsTt:")) != -1) switch (ch) { case 'A': abiflag = 1; break; case 'f': tracefile = optarg; break; case 'd': decimal = 1; break; case 'l': tail = 1; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = 0; break; case 'p': pid = atoi(optarg); break; case 'r': resolv = 1; break; case 'S': syscallno = 1; break; case 's': suppressdata = 1; break; case 'E': timestamp |= TIMESTAMP_ELAPSED; break; case 'H': threads = 1; break; case 'R': timestamp |= TIMESTAMP_RELATIVE; break; case 'T': timestamp |= TIMESTAMP_ABSOLUTE; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (strcmp(tracefile, "-") != 0) if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); strerror_init(); localtime_init(); #ifdef HAVE_LIBCASPER if (resolv != 0) { if (cappwdgrp_setup(&cappwd, &capgrp) < 0) { cappwd = NULL; capgrp = NULL; } } if (resolv == 0 || (cappwd != NULL && capgrp != NULL)) { if (cap_enter() < 0 && errno != ENOSYS) err(1, "unable to enter capability mode"); } #else if (resolv == 0) { if (cap_enter() < 0 && errno != ENOSYS) err(1, "unable to enter capability mode"); } #endif if (caph_limit_stdio() == -1) err(1, "unable to limit stdio"); TAILQ_INIT(&trace_procs); drop_logged = 0; while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (ktr_header.ktr_type & KTR_DROP) { ktr_header.ktr_type &= ~KTR_DROP; if (!drop_logged && threads) { printf( "%6jd %6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, ktr_header.ktr_tid > 0 ? (intmax_t)ktr_header.ktr_tid : 0, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } else if (!drop_logged) { printf("%6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } } if (trpoints & (1< size) { m = realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if (fetchprocinfo(&ktr_header, (u_int *)m) != 0) continue; sv_flags = abidump(&ktr_header); if (pid && ktr_header.ktr_pid != pid && ktr_header.ktr_tid != pid) continue; if ((trpoints & (1<ktr_type) { case KTR_PROCCTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); break; } } pi = malloc(sizeof(struct proc_info)); if (pi == NULL) errx(1, "%s", strerror(ENOMEM)); pi->sv_flags = *flags; pi->pid = kth->ktr_pid; TAILQ_INSERT_TAIL(&trace_procs, pi, info); return (1); case KTR_PROCDTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); free(pi); break; } } return (1); } return (0); } u_int abidump(struct ktr_header *kth) { struct proc_info *pi; const char *abi; const char *arch; u_int flags = 0; TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { flags = pi->sv_flags; break; } } if (abiflag == 0) return (flags); switch (flags & SV_ABI_MASK) { case SV_ABI_LINUX: abi = "L"; break; case SV_ABI_FREEBSD: abi = "F"; break; case SV_ABI_CLOUDABI: abi = "C"; break; default: abi = "U"; break; } if (flags & SV_LP64) arch = "64"; else if (flags & SV_ILP32) arch = "32"; else arch = "00"; printf("%s%s ", abi, arch); return (flags); } void dumpheader(struct ktr_header *kth) { static char unknown[64]; static struct timeval prevtime, prevtime_e; struct timeval temp; const char *type; const char *sign; switch (kth->ktr_type) { case KTR_SYSCALL: type = "CALL"; break; case KTR_SYSRET: type = "RET "; break; case KTR_NAMEI: type = "NAMI"; break; case KTR_GENIO: type = "GIO "; break; case KTR_PSIG: type = "PSIG"; break; case KTR_CSW: type = "CSW "; break; case KTR_USER: type = "USER"; break; case KTR_STRUCT: type = "STRU"; break; case KTR_SYSCTL: type = "SCTL"; break; case KTR_PROCCTOR: /* FALLTHROUGH */ case KTR_PROCDTOR: return; case KTR_CAPFAIL: type = "CAP "; break; case KTR_FAULT: type = "PFLT"; break; case KTR_FAULTEND: type = "PRET"; break; default: sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; } /* * The ktr_tid field was previously the ktr_buffer field, which held * the kernel pointer value for the buffer associated with data * following the record header. It now holds a threadid, but only * for trace files after the change. Older trace files still contain * kernel pointers. Detect this and suppress the results by printing * negative tid's as 0. */ if (threads) printf("%6jd %6jd %-8.*s ", (intmax_t)kth->ktr_pid, kth->ktr_tid > 0 ? (intmax_t)kth->ktr_tid : 0, MAXCOMLEN, kth->ktr_comm); else printf("%6jd %-8.*s ", (intmax_t)kth->ktr_pid, MAXCOMLEN, kth->ktr_comm); if (timestamp) { if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timersub(&kth->ktr_time, &prevtime_e, &temp); printf("%jd.%06ld ", (intmax_t)temp.tv_sec, temp.tv_usec); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; if (timercmp(&kth->ktr_time, &prevtime, <)) { timersub(&prevtime, &kth->ktr_time, &temp); sign = "-"; } else { timersub(&kth->ktr_time, &prevtime, &temp); sign = ""; } prevtime = kth->ktr_time; printf("%s%jd.%06ld ", sign, (intmax_t)temp.tv_sec, temp.tv_usec); } } printf("%s ", type); } #include static void ioctlname(unsigned long val) { const char *str; str = sysdecode_ioctlname(val); if (str != NULL) printf("%s", str); else if (decimal) printf("%lu", val); else printf("%#lx", val); } static enum sysdecode_abi syscallabi(u_int sv_flags) { if (sv_flags == 0) return (SYSDECODE_ABI_FREEBSD); switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: return (SYSDECODE_ABI_FREEBSD); #if defined(__amd64__) || defined(__i386__) case SV_ABI_LINUX: #ifdef __amd64__ if (sv_flags & SV_ILP32) return (SYSDECODE_ABI_LINUX32); #endif return (SYSDECODE_ABI_LINUX); #endif #if defined(__aarch64__) || defined(__amd64__) case SV_ABI_CLOUDABI: return (SYSDECODE_ABI_CLOUDABI64); #endif default: return (SYSDECODE_ABI_UNKNOWN); } } static void syscallname(u_int code, u_int sv_flags) { const char *name; name = sysdecode_syscallname(syscallabi(sv_flags), code); if (name == NULL) printf("[%d]", code); else { printf("%s", name); if (syscallno) printf("[%d]", code); } } static void print_signal(int signo) { const char *signame; signame = sysdecode_signal(signo); if (signame != NULL) printf("%s", signame); else printf("SIG %d", signo); } void ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip, *first; intmax_t arg; int quad_align, quad_slots; syscallname(ktr->ktr_code, sv_flags); ip = first = &ktr->ktr_args[0]; if (narg) { char c = '('; if (fancy && (sv_flags == 0 || (sv_flags & SV_ABI_MASK) == SV_ABI_FREEBSD)) { quad_align = 0; if (sv_flags & SV_ILP32) { #ifdef __powerpc__ quad_align = 1; #endif quad_slots = 2; } else quad_slots = 1; switch (ktr->ktr_code) { case SYS_bindat: case SYS_chflagsat: case SYS_connectat: case SYS_faccessat: case SYS_fchmodat: case SYS_fchownat: case SYS_fstatat: case SYS_futimesat: case SYS_linkat: case SYS_mkdirat: case SYS_mkfifoat: case SYS_mknodat: case SYS_openat: case SYS_readlinkat: case SYS_renameat: case SYS_unlinkat: case SYS_utimensat: putchar('('); print_integer_arg_valid(sysdecode_atfd, *ip); c = ','; ip++; narg--; break; } switch (ktr->ktr_code) { case SYS_ioctl: { print_number(ip, narg, c); putchar(c); ioctlname(*ip); c = ','; ip++; narg--; break; } case SYS_ptrace: putchar('('); print_integer_arg(sysdecode_ptrace_request, *ip); c = ','; ip++; narg--; break; case SYS_access: case SYS_eaccess: case SYS_faccessat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_access_mode, *ip); ip++; narg--; break; case SYS_open: case SYS_openat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); if ((ip[0] & O_CREAT) == O_CREAT) { putchar(','); decode_filemode(ip[1]); } ip += 2; narg -= 2; break; case SYS_wait4: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_wait4_options, *ip); ip++; narg--; break; case SYS_wait6: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_wait6_options, *ip); ip++; narg--; break; case SYS_chmod: case SYS_fchmod: case SYS_lchmod: case SYS_fchmodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; - case SYS_mknod: case SYS_mknodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_getfsstat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_getfsstat_mode, *ip); ip++; narg--; break; case SYS_mount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_unmount: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_recvmsg: case SYS_sendmsg: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_recvfrom: case SYS_sendto: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_chflags: case SYS_chflagsat: case SYS_fchflags: case SYS_lchflags: print_number(ip, narg, c); putchar(','); decode_fileflags(*ip); ip++; narg--; break; case SYS_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_reboot: putchar('('); print_mask_arg(sysdecode_reboot_howto, *ip); ip++; narg--; break; case SYS_umask: putchar('('); decode_filemode(*ip); ip++; narg--; break; case SYS_msync: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_msync_flags, *ip); ip++; narg--; break; #ifdef SYS_freebsd6_mmap case SYS_freebsd6_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; #endif case SYS_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; case SYS_mprotect: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); ip++; narg--; break; case SYS_madvise: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_madvice, *ip); ip++; narg--; break; case SYS_setpriority: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_prio_which, *ip); ip++; narg--; break; case SYS_fcntl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_fcntl_cmd, ip[0]); if (sysdecode_fcntl_arg_p(ip[0])) { putchar(','); if (ip[0] == F_SETFL) print_mask_arg( sysdecode_fcntl_fileflags, ip[1]); else sysdecode_fcntl_arg(stdout, ip[0], ip[1], decimal ? 10 : 16); } ip += 2; narg -= 2; break; case SYS_socket: { int sockdomain; putchar('('); sockdomain = *ip; print_integer_arg(sysdecode_socketdomain, sockdomain); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; if (sockdomain == PF_INET || sockdomain == PF_INET6) { putchar(','); print_integer_arg(sysdecode_ipproto, *ip); ip++; narg--; } c = ','; break; } case SYS_setsockopt: case SYS_getsockopt: { const char *str; print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_sockopt_level, *ip); str = sysdecode_sockopt_name(ip[0], ip[1]); if (str != NULL) { printf(",%s", str); ip++; narg--; } ip++; narg--; break; } #ifdef SYS_freebsd6_lseek case SYS_freebsd6_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; #endif case SYS_lseek: print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; case SYS_flock: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_flock_operation, *ip); ip++; narg--; break; case SYS_mkfifo: case SYS_mkfifoat: case SYS_mkdir: case SYS_mkdirat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_shutdown: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shutdown_how, *ip); ip++; narg--; break; case SYS_socketpair: putchar('('); print_integer_arg(sysdecode_socketdomain, *ip); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; c = ','; break; case SYS_getrlimit: case SYS_setrlimit: putchar('('); print_integer_arg(sysdecode_rlimit, *ip); ip++; narg--; c = ','; break; case SYS_quotactl: print_number(ip, narg, c); putchar(','); if (!sysdecode_quotactl_cmd(stdout, *ip)) { if (decimal) printf("", (int)*ip); else printf("", (int)*ip); } ip++; narg--; c = ','; break; case SYS_nfssvc: putchar('('); print_integer_arg(sysdecode_nfssvc_flags, *ip); ip++; narg--; c = ','; break; case SYS_rtprio: putchar('('); print_integer_arg(sysdecode_rtprio_function, *ip); ip++; narg--; c = ','; break; case SYS___semctl: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_semctl_cmd, *ip); ip++; narg--; break; case SYS_semget: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_semget_flags, *ip); ip++; narg--; break; case SYS_msgctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_msgctl_cmd, *ip); ip++; narg--; break; case SYS_shmat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_shmat_flags, *ip); ip++; narg--; break; case SYS_shmctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shmctl_cmd, *ip); ip++; narg--; break; case SYS_shm_open: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); ip += 2; narg -= 2; break; case SYS_minherit: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_minherit_inherit, *ip); ip++; narg--; break; case SYS_rfork: putchar('('); print_mask_arg(sysdecode_rfork_flags, *ip); ip++; narg--; c = ','; break; case SYS_lio_listio: putchar('('); print_integer_arg(sysdecode_lio_listio_mode, *ip); ip++; narg--; c = ','; break; case SYS_mlockall: putchar('('); print_mask_arg(sysdecode_mlockall_flags, *ip); ip++; narg--; break; case SYS_sched_setscheduler: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sched_get_priority_max: case SYS_sched_get_priority_min: putchar('('); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sendfile: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_sendfile_flags, *ip); ip++; narg--; break; case SYS_kldsym: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldsym_cmd, *ip); ip++; narg--; break; case SYS_sigprocmask: putchar('('); print_integer_arg(sysdecode_sigprocmask_how, *ip); ip++; narg--; c = ','; break; case SYS___acl_get_file: case SYS___acl_set_file: case SYS___acl_get_fd: case SYS___acl_set_fd: case SYS___acl_delete_file: case SYS___acl_delete_fd: case SYS___acl_aclcheck_file: case SYS___acl_aclcheck_fd: case SYS___acl_get_link: case SYS___acl_set_link: case SYS___acl_delete_link: case SYS___acl_aclcheck_link: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_acltype, *ip); ip++; narg--; break; case SYS_sigaction: putchar('('); print_signal(*ip); ip++; narg--; c = ','; break; case SYS_extattrctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_extattrnamespace, *ip); ip++; narg--; break; case SYS_nmount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_thr_create: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_thr_create_flags, *ip); ip++; narg--; break; case SYS_thr_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_kldunloadf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldunload_flags, *ip); ip++; narg--; break; case SYS_linkat: case SYS_renameat: case SYS_symlinkat: print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_atfd, *ip); ip++; narg--; break; case SYS_cap_fcntls_limit: print_number(ip, narg, c); putchar(','); arg = *ip; ip++; narg--; print_mask_arg32(sysdecode_cap_fcntlrights, arg); break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); (void)putchar(','); print_integer_arg(sysdecode_fadvice, *ip); ip++; narg--; break; case SYS_procctl: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_procctl_cmd, *ip); ip++; narg--; break; case SYS__umtx_op: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_umtx_op, *ip); switch (*ip) { case UMTX_OP_CV_WAIT: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_cvwait_flags, *ip); break; case UMTX_OP_RW_RDLOCK: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_rwlock_flags, *ip); break; } ip++; narg--; break; case SYS_ftruncate: case SYS_truncate: print_number(ip, narg, c); print_number64(first, ip, narg, c); break; } } while (narg > 0) { print_number(ip, narg, c); } putchar(')'); } putchar('\n'); } void ktrsysret(struct ktr_sysret *ktr, u_int sv_flags) { register_t ret = ktr->ktr_retval; int error = ktr->ktr_error; syscallname(ktr->ktr_code, sv_flags); printf(" "); if (error == 0) { if (fancy) { printf("%ld", (long)ret); if (ret < 0 || ret > 9) printf("/%#lx", (unsigned long)ret); } else { if (decimal) printf("%ld", (long)ret); else printf("%#lx", (unsigned long)ret); } } else if (error == ERESTART) printf("RESTART"); else if (error == EJUSTRETURN) printf("JUSTRETURN"); else { printf("-1 errno %d", sysdecode_freebsd_to_abi_errno( syscallabi(sv_flags), error)); if (fancy) printf(" %s", strerror(ktr->ktr_error)); } putchar('\n'); } void ktrnamei(char *cp, int len) { printf("\"%.*s\"\n", len, cp); } void hexdump(char *p, int len, int screenwidth) { int n, i; int width; width = 0; do { width += 2; i = 13; /* base offset */ i += (width / 2) + 1; /* spaces every second byte */ i += (width * 2); /* width of bytes */ i += 3; /* " |" */ i += width; /* each byte */ i += 1; /* "|" */ } while (i < screenwidth); width -= 2; for (n = 0; n < len; n += width) { for (i = n; i < n + width; i++) { if ((i % width) == 0) { /* beginning of line */ printf(" 0x%04x", i); } if ((i % 2) == 0) { printf(" "); } if (i < len) printf("%02x", p[i] & 0xff); else printf(" "); } printf(" |"); for (i = n; i < n + width; i++) { if (i >= len) break; if (p[i] >= ' ' && p[i] <= '~') printf("%c", p[i]); else printf("."); } printf("|\n"); } if ((i % width) != 0) printf("\n"); } void visdump(char *dp, int datalen, int screenwidth) { int col = 0; char *cp; int width; char visbuf[5]; printf(" \""); col = 8; for (;datalen > 0; datalen--, dp++) { vis(visbuf, *dp, VIS_CSTYLE, *(dp+1)); cp = visbuf; /* * Keep track of printables and * space chars (like fold(1)). */ if (col == 0) { putchar('\t'); col = 8; } switch(*cp) { case '\n': col = 0; putchar('\n'); continue; case '\t': width = 8 - (col&07); break; default: width = strlen(cp); } if (col + width > (screenwidth-2)) { printf("\\\n\t"); col = 8; } col += width; do { putchar(*cp++); } while (*cp); } if (col == 0) printf(" "); printf("\"\n"); } void ktrgenio(struct ktr_genio *ktr, int len) { int datalen = len - sizeof (struct ktr_genio); char *dp = (char *)ktr + sizeof (struct ktr_genio); static int screenwidth = 0; int i, binary; printf("fd %d %s %d byte%s\n", ktr->ktr_fd, ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen, datalen == 1 ? "" : "s"); if (suppressdata) return; if (screenwidth == 0) { struct winsize ws; if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 && ws.ws_col > 8) screenwidth = ws.ws_col; else screenwidth = 80; } if (maxdata && datalen > maxdata) datalen = maxdata; for (i = 0, binary = 0; i < datalen && binary == 0; i++) { if (dp[i] >= 32 && dp[i] < 127) continue; if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9) continue; binary = 1; } if (binary) hexdump(dp, datalen, screenwidth); else visdump(dp, datalen, screenwidth); } void ktrpsig(struct ktr_psig *psig) { const char *str; print_signal(psig->signo); if (psig->action == SIG_DFL) { printf(" SIG_DFL"); } else { printf(" caught handler=0x%lx mask=0x%x", (u_long)psig->action, psig->mask.__bits[0]); } printf(" code="); str = sysdecode_sigcode(psig->signo, psig->code); if (str != NULL) printf("%s", str); else printf("", psig->code); putchar('\n'); } void ktrcsw_old(struct ktr_csw_old *cs) { printf("%s %s\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel"); } void ktrcsw(struct ktr_csw *cs) { printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel", cs->wmesg); } void ktruser(int len, void *p) { unsigned char *cp; if (sysdecode_utrace(stdout, p, len)) { printf("\n"); return; } printf("%d ", len); cp = p; while (len--) if (decimal) printf(" %d", *cp++); else printf(" %02x", *cp++); printf("\n"); } void ktrcaprights(cap_rights_t *rightsp) { printf("cap_rights_t "); sysdecode_cap_rights(stdout, rightsp); printf("\n"); } static void ktrtimeval(struct timeval *tv) { printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); } void ktritimerval(struct itimerval *it) { printf("itimerval { .interval = "); ktrtimeval(&it->it_interval); printf(", .value = "); ktrtimeval(&it->it_value); printf(" }\n"); } void ktrsockaddr(struct sockaddr *sa) { /* TODO: Support additional address families #include struct sockaddr_nb *nb; */ const char *str; char addr[64]; /* * note: ktrstruct() has already verified that sa points to a * buffer at least sizeof(struct sockaddr) bytes long and exactly * sa->sa_len bytes long. */ printf("struct sockaddr { "); str = sysdecode_sockaddr_family(sa->sa_family); if (str != NULL) printf("%s", str); else printf("", sa->sa_family); printf(", "); #define check_sockaddr_len(n) \ if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) { \ printf("invalid"); \ break; \ } switch(sa->sa_family) { case AF_INET: { struct sockaddr_in sa_in; memset(&sa_in, 0, sizeof(sa_in)); memcpy(&sa_in, sa, sa->sa_len); check_sockaddr_len(in); inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr); printf("%s:%u", addr, ntohs(sa_in.sin_port)); break; } case AF_INET6: { struct sockaddr_in6 sa_in6; memset(&sa_in6, 0, sizeof(sa_in6)); memcpy(&sa_in6, sa, sa->sa_len); check_sockaddr_len(in6); getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6), addr, sizeof(addr), NULL, 0, NI_NUMERICHOST); printf("[%s]:%u", addr, htons(sa_in6.sin6_port)); break; } case AF_UNIX: { struct sockaddr_un sa_un; memset(&sa_un, 0, sizeof(sa_un)); memcpy(&sa_un, sa, sa->sa_len); printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path); break; } default: printf("unknown address family"); } printf(" }\n"); } void ktrstat(struct stat *statp) { char mode[12], timestr[PATH_MAX + 4]; struct passwd *pwd; struct group *grp; struct tm *tm; /* * note: ktrstruct() has already verified that statp points to a * buffer exactly sizeof(struct stat) bytes long. */ printf("struct stat {"); printf("dev=%ju, ino=%ju, ", (uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino); if (resolv == 0) printf("mode=0%jo, ", (uintmax_t)statp->st_mode); else { strmode(statp->st_mode, mode); printf("mode=%s, ", mode); } printf("nlink=%ju, ", (uintmax_t)statp->st_nlink); if (resolv == 0) { pwd = NULL; } else { #ifdef HAVE_LIBCASPER if (cappwd != NULL) pwd = cap_getpwuid(cappwd, statp->st_uid); else #endif pwd = getpwuid(statp->st_uid); } if (pwd == NULL) printf("uid=%ju, ", (uintmax_t)statp->st_uid); else printf("uid=\"%s\", ", pwd->pw_name); if (resolv == 0) { grp = NULL; } else { #ifdef HAVE_LIBCASPER if (capgrp != NULL) grp = cap_getgrgid(capgrp, statp->st_gid); else #endif grp = getgrgid(statp->st_gid); } if (grp == NULL) printf("gid=%ju, ", (uintmax_t)statp->st_gid); else printf("gid=\"%s\", ", grp->gr_name); printf("rdev=%ju, ", (uintmax_t)statp->st_rdev); printf("atime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_atim.tv_sec); else { tm = localtime(&statp->st_atim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_atim.tv_nsec != 0) printf(".%09ld, ", statp->st_atim.tv_nsec); else printf(", "); printf("mtime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_mtim.tv_sec); else { tm = localtime(&statp->st_mtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_mtim.tv_nsec != 0) printf(".%09ld, ", statp->st_mtim.tv_nsec); else printf(", "); printf("ctime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_ctim.tv_sec); else { tm = localtime(&statp->st_ctim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_ctim.tv_nsec != 0) printf(".%09ld, ", statp->st_ctim.tv_nsec); else printf(", "); printf("birthtime="); if (resolv == 0) printf("%jd", (intmax_t)statp->st_birthtim.tv_sec); else { tm = localtime(&statp->st_birthtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_birthtim.tv_nsec != 0) printf(".%09ld, ", statp->st_birthtim.tv_nsec); else printf(", "); printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x", (uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize, (intmax_t)statp->st_blocks, statp->st_flags); printf(" }\n"); } void ktrstruct(char *buf, size_t buflen) { char *name, *data; size_t namelen, datalen; int i; cap_rights_t rights; struct itimerval it; struct stat sb; struct sockaddr_storage ss; for (name = buf, namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; data = buf + namelen + 1; datalen = buflen - namelen - 1; if (datalen == 0) goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalpha(name[i])) goto invalid; if (strcmp(name, "caprights") == 0) { if (datalen != sizeof(cap_rights_t)) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; memcpy(&it, data, datalen); ktritimerval(&it); } else if (strcmp(name, "stat") == 0) { if (datalen != sizeof(struct stat)) goto invalid; memcpy(&sb, data, datalen); ktrstat(&sb); } else if (strcmp(name, "sockaddr") == 0) { if (datalen > sizeof(ss)) goto invalid; memcpy(&ss, data, datalen); if (datalen != ss.ss_len) goto invalid; ktrsockaddr((struct sockaddr *)&ss); } else { printf("unknown structure\n"); } return; invalid: printf("invalid record\n"); } void ktrcapfail(struct ktr_cap_fail *ktr) { switch (ktr->cap_type) { case CAPFAIL_NOTCAPABLE: /* operation on fd with insufficient capabilities */ printf("operation requires "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(", descriptor holds "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; case CAPFAIL_INCREASE: /* requested more capabilities than fd already has */ printf("attempt to increase capabilities from "); sysdecode_cap_rights(stdout, &ktr->cap_held); printf(" to "); sysdecode_cap_rights(stdout, &ktr->cap_needed); break; case CAPFAIL_SYSCALL: /* called restricted syscall */ printf("disallowed system call"); break; case CAPFAIL_LOOKUP: /* used ".." in strict-relative mode */ printf("restricted VFS lookup"); break; default: printf("unknown capability failure: "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(" "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; } printf("\n"); } void ktrfault(struct ktr_fault *ktr) { printf("0x%jx ", (uintmax_t)ktr->vaddr); print_mask_arg(sysdecode_vmprot, ktr->type); printf("\n"); } void ktrfaultend(struct ktr_faultend *ktr) { const char *str; str = sysdecode_vmresult(ktr->result); if (str != NULL) printf("%s", str); else printf("", ktr->result); printf("\n"); } void usage(void) { fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] " "[-m maxdata] [-p pid] [-t trstr]\n"); exit(1); } Index: head/usr.bin/lastcomm/lastcomm.c =================================================================== --- head/usr.bin/lastcomm/lastcomm.c (revision 318735) +++ head/usr.bin/lastcomm/lastcomm.c (revision 318736) @@ -1,275 +1,275 @@ /* * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)lastcomm.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "pathnames.h" /*XXX*/#include time_t expand(u_int); char *flagbits(int); const char *getdev(dev_t); -int readrec_forward(FILE *f, struct acctv2 *av2); -int readrec_backward(FILE *f, struct acctv2 *av2); -int requested(char *[], struct acctv2 *); +int readrec_forward(FILE *f, struct acctv3 *av3); +int readrec_backward(FILE *f, struct acctv3 *av3); +int requested(char *[], struct acctv3 *); static void usage(void); #define AC_UTIME 1 /* user */ #define AC_STIME 2 /* system */ #define AC_ETIME 4 /* elapsed */ #define AC_CTIME 8 /* user + system time, default */ #define AC_BTIME 16 /* starting time */ #define AC_FTIME 32 /* exit time (starting time + elapsed time )*/ int main(int argc, char *argv[]) { - struct acctv2 ab; + struct acctv3 ab; char *p; FILE *fp; - int (*readrec)(FILE *f, struct acctv2 *av2); + int (*readrec)(FILE *f, struct acctv3 *av3); time_t t; int ch, rv; const char *acctfile, *format; char buf[1024]; int flags = 0; acctfile = _PATH_ACCT; format = NULL; while ((ch = getopt(argc, argv, "f:usecSE")) != -1) switch((char)ch) { case 'f': acctfile = optarg; break; case 'u': flags |= AC_UTIME; /* user time */ break; case 's': flags |= AC_STIME; /* system time */ break; case 'e': flags |= AC_ETIME; /* elapsed time */ break; case 'c': flags |= AC_CTIME; /* user + system time */ break; case 'S': flags |= AC_BTIME; /* starting time */ break; case 'E': /* exit time (starting time + elapsed time )*/ flags |= AC_FTIME; break; case '?': default: usage(); } /* default user + system time and starting time */ if (!flags) { flags = AC_CTIME | AC_BTIME; } argc -= optind; argv += optind; if (argc > 0 && **argv == '+') { format = *argv + 1; /* skip + */ argc--; argv++; } if (strcmp(acctfile, "-") == 0) { fp = stdin; readrec = readrec_forward; } else { /* Open the file. */ if ((fp = fopen(acctfile, "r")) == NULL) err(1, "could not open %s", acctfile); if (fseek(fp, 0l, SEEK_END) == -1) err(1, "seek to end of %s failed", acctfile); readrec = readrec_backward; } while ((rv = readrec(fp, &ab)) == 1) { for (p = &ab.ac_comm[0]; p < &ab.ac_comm[AC_COMM_LEN] && *p; ++p) if (!isprint(*p)) *p = '?'; if (*argv && !requested(argv, &ab)) continue; (void)printf("%-*.*s %-7s %-*s %-8s", AC_COMM_LEN, AC_COMM_LEN, ab.ac_comm, flagbits(ab.ac_flagx), MAXLOGNAME - 1, user_from_uid(ab.ac_uid, 0), getdev(ab.ac_tty)); /* user + system time */ if (flags & AC_CTIME) { (void)printf(" %6.3f secs", (ab.ac_utime + ab.ac_stime) / 1000000); } /* usr time */ if (flags & AC_UTIME) { (void)printf(" %6.3f us", ab.ac_utime / 1000000); } /* system time */ if (flags & AC_STIME) { (void)printf(" %6.3f sy", ab.ac_stime / 1000000); } /* elapsed time */ if (flags & AC_ETIME) { (void)printf(" %8.3f es", ab.ac_etime / 1000000); } /* starting time */ if (flags & AC_BTIME) { if (format != NULL) { (void)strftime(buf, sizeof(buf), format, localtime(&ab.ac_btime)); (void)printf(" %s", buf); } else (void)printf(" %.16s", ctime(&ab.ac_btime)); } /* exit time (starting time + elapsed time )*/ if (flags & AC_FTIME) { t = ab.ac_btime; t += (time_t)(ab.ac_etime / 1000000); if (format != NULL) { (void)strftime(buf, sizeof(buf), format, localtime(&t)); (void)printf(" %s", buf); } else (void)printf(" %.16s", ctime(&t)); } printf("\n"); } if (rv == EOF) err(1, "read record from %s failed", acctfile); if (fflush(stdout)) err(1, "stdout"); exit(0); } char * flagbits(int f) { static char flags[20] = "-"; char *p; #define BIT(flag, ch) if (f & flag) *p++ = ch p = flags + 1; BIT(ASU, 'S'); BIT(AFORK, 'F'); BIT(ACOMPAT, 'C'); BIT(ACORE, 'D'); BIT(AXSIG, 'X'); *p = '\0'; return (flags); } int -requested(char *argv[], struct acctv2 *acp) +requested(char *argv[], struct acctv3 *acp) { const char *p; do { p = user_from_uid(acp->ac_uid, 0); if (!strcmp(p, *argv)) return (1); if ((p = getdev(acp->ac_tty)) && !strcmp(p, *argv)) return (1); if (!strncmp(acp->ac_comm, *argv, AC_COMM_LEN)) return (1); } while (*++argv); return (0); } const char * getdev(dev_t dev) { static dev_t lastdev = (dev_t)-1; static const char *lastname; if (dev == NODEV) /* Special case. */ return ("__"); if (dev == lastdev) /* One-element cache. */ return (lastname); lastdev = dev; lastname = devname(dev, S_IFCHR); return (lastname); } static void usage(void) { (void)fprintf(stderr, "usage: lastcomm [-EScesu] [-f file] [+format] [command ...] " "[user ...] [terminal ...]\n"); exit(1); } Index: head/usr.bin/lastcomm/readrec.c =================================================================== --- head/usr.bin/lastcomm/readrec.c (revision 318735) +++ head/usr.bin/lastcomm/readrec.c (revision 318736) @@ -1,227 +1,263 @@ /*- * Copyright (c) 2007 Diomidis Spinellis * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include -int readrec_forward(FILE *f, struct acctv2 *av2); -int readrec_backward(FILE *f, struct acctv2 *av2); +int readrec_forward(FILE *f, struct acctv3 *av2); +int readrec_backward(FILE *f, struct acctv3 *av2); /* * Reverse offsetof: return the offset of field f * from the end of the structure s. */ #define roffsetof(s, f) (sizeof(s) - offsetof(s, f)) /* * Read exactly one record of size size from stream f into ptr. * Failure to read the complete record is considered a file format error, * and will set errno to EFTYPE. * Return 0 on success, EOF on end of file or error. */ static int fread_record(void *ptr, size_t size, FILE *f) { size_t rv; if ((rv = fread(ptr, 1, size, f)) == size) return (0); else if (ferror(f) || rv == 0) return (EOF); else { /* Short read. */ errno = EFTYPE; return (EOF); } } /* * Return the value of a comp_t field. */ static float decode_comp(comp_t v) { int result, exp; result = v & 017777; for (exp = v >> 13; exp; exp--) result <<= 3; return ((double)result / AHZV1); } /* * Read a v1 accounting record stored at the current * position of stream f. * Convert the data to the current record format. * Return EOF on error or end-of-file. */ static int -readrec_v1(FILE *f, struct acctv2 *av2) +readrec_v1(FILE *f, struct acctv3 *av3) { struct acctv1 av1; int rv; if ((rv = fread_record(&av1, sizeof(av1), f)) == EOF) return (EOF); - av2->ac_zero = 0; - av2->ac_version = 2; - av2->ac_len = av2->ac_len2 = sizeof(*av2); - memcpy(av2->ac_comm, av1.ac_comm, AC_COMM_LEN); - av2->ac_utime = decode_comp(av1.ac_utime) * 1000000; - av2->ac_stime = decode_comp(av1.ac_stime) * 1000000; - av2->ac_etime = decode_comp(av1.ac_etime) * 1000000; - av2->ac_btime = av1.ac_btime; - av2->ac_uid = av1.ac_uid; - av2->ac_gid = av1.ac_gid; - av2->ac_mem = av1.ac_mem; - av2->ac_io = decode_comp(av1.ac_io); - av2->ac_tty = av1.ac_tty; - av2->ac_flagx = av1.ac_flag | ANVER; + av3->ac_zero = 0; + av3->ac_version = 3; + av3->ac_len = av3->ac_len2 = sizeof(*av3); + memcpy(av3->ac_comm, av1.ac_comm, AC_COMM_LEN); + av3->ac_utime = decode_comp(av1.ac_utime) * 1000000; + av3->ac_stime = decode_comp(av1.ac_stime) * 1000000; + av3->ac_etime = decode_comp(av1.ac_etime) * 1000000; + av3->ac_btime = av1.ac_btime; + av3->ac_uid = av1.ac_uid; + av3->ac_gid = av1.ac_gid; + av3->ac_mem = av1.ac_mem; + av3->ac_io = decode_comp(av1.ac_io); + av3->ac_tty = av1.ac_tty; + av3->ac_flagx = av1.ac_flag | ANVER; return (0); } /* * Read an v2 accounting record stored at the current * position of stream f. * Return EOF on error or end-of-file. */ static int -readrec_v2(FILE *f, struct acctv2 *av2) +readrec_v2(FILE *f, struct acctv3 *av3) { - return (fread_record(av2, sizeof(*av2), f)); + struct acctv2 av2; + int rv; + + if ((rv = fread_record(&av2, sizeof(av2), f)) == EOF) + return (EOF); + av3->ac_zero = 0; + av3->ac_version = 3; + av3->ac_len = av3->ac_len2 = sizeof(*av3); + memcpy(av3->ac_comm, av2.ac_comm, AC_COMM_LEN); + av3->ac_utime = av2.ac_utime; + av3->ac_stime = av2.ac_stime; + av3->ac_etime = av2.ac_etime; + av3->ac_btime = av2.ac_btime; + av3->ac_uid = av2.ac_uid; + av3->ac_gid = av2.ac_gid; + av3->ac_mem = av2.ac_mem; + av3->ac_io = av2.ac_io; + av3->ac_tty = av2.ac_tty; + av3->ac_flagx = av2.ac_flagx; + return (0); } /* + * Read an v2 accounting record stored at the current + * position of stream f. + * Return EOF on error or end-of-file. + */ +static int +readrec_v3(FILE *f, struct acctv3 *av3) +{ + + return (fread_record(av3, sizeof(*av3), f)); +} + +/* * Read a new-style (post-v1) accounting record stored at * the current position of stream f. * Convert the data to the current record format. * Return EOF on error or end-of-file. */ static int -readrec_vx(FILE *f, struct acctv2 *av2) +readrec_vx(FILE *f, struct acctv3 *av3) { uint8_t magic, version; if (fread_record(&magic, sizeof(magic), f) == EOF || fread_record(&version, sizeof(version), f) == EOF || ungetc(version, f) == EOF || ungetc(magic, f) == EOF) return (EOF); switch (version) { case 2: - return (readrec_v2(f, av2)); + return (readrec_v2(f, av3)); + case 3: + return (readrec_v3(f, av3)); /* Add handling for more versions here. */ default: errno = EFTYPE; return (EOF); } } /* * Read an accounting record stored at the current * position of stream f. * Old-format records are converted to the current record * format. * Return the number of records read (1 or 0 at the end-of-file), * or EOF on error. */ int -readrec_forward(FILE *f, struct acctv2 *av2) +readrec_forward(FILE *f, struct acctv3 *av3) { int magic, rv; if ((magic = getc(f)) == EOF) return (ferror(f) ? EOF : 0); if (ungetc(magic, f) == EOF) return (EOF); if (magic != 0) /* Old record format. */ - rv = readrec_v1(f, av2); + rv = readrec_v1(f, av3); else /* New record formats. */ - rv = readrec_vx(f, av2); + rv = readrec_vx(f, av3); return (rv == EOF ? EOF : 1); } /* * Read an accounting record ending at the current * position of stream f. * Old-format records are converted to the current record * format. * The file pointer is positioned at the beginning of the * record read. * Return the number of records read (1 or 0 at the end-of-file), * or EOF on error. */ int -readrec_backward(FILE *f, struct acctv2 *av2) +readrec_backward(FILE *f, struct acctv3 *av3) { off_t pos; int c; uint16_t len; if ((pos = ftell(f)) == -1) return (EOF); if (pos == 0) return (0); - if (fseek(f, -roffsetof(struct acctv2, ac_trailer), + if (fseek(f, -roffsetof(struct acctv3, ac_trailer), SEEK_CUR) == EOF || (c = getc(f)) == EOF) return (EOF); if (c & ANVER) { - /* New record formats. */ + /* + * New record formats. For v2 and v3 offset from the + * end for ac_len2 should be same. + */ if (fseeko(f, pos - roffsetof(struct acctv2, ac_len2), SEEK_SET) == EOF || fread_record(&len, sizeof(len), f) == EOF || fseeko(f, pos - len, SEEK_SET) == EOF || - readrec_vx(f, av2) == EOF || + readrec_vx(f, av3) == EOF || fseeko(f, pos - len, SEEK_SET) == EOF) return (EOF); else return (1); } else { /* Old record format. */ if (fseeko(f, pos - sizeof(struct acctv1), SEEK_SET) == EOF || - readrec_v1(f, av2) == EOF || + readrec_v1(f, av3) == EOF || fseeko(f, pos - sizeof(struct acctv1), SEEK_SET) == EOF) return (EOF); else return (1); } } Index: head/usr.sbin/pstat/pstat.c =================================================================== --- head/usr.sbin/pstat/pstat.c (revision 318735) +++ head/usr.sbin/pstat/pstat.c (revision 318736) @@ -1,595 +1,595 @@ /*- * Copyright (c) 1980, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * Copyright (c) 2002 Networks Associates Technologies, Inc. * All rights reserved. * * Portions of this software were developed for the FreeBSD Project by * ThinkSec AS and NAI Labs, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 * ("CBOSS"), as part of the DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1991, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)pstat.c 8.16 (Berkeley) 5/9/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum { NL_CONSTTY, NL_MAXFILES, NL_NFILES, NL_TTY_LIST, NL_MARKER }; static struct { int order; const char *name; } namelist[] = { { NL_CONSTTY, "_constty" }, { NL_MAXFILES, "_maxfiles" }, { NL_NFILES, "_openfiles" }, { NL_TTY_LIST, "_tty_list" }, { NL_MARKER, "" }, }; #define NNAMES (sizeof(namelist) / sizeof(*namelist)) static struct nlist nl[NNAMES]; static int humanflag; static int usenumflag; static int totalflag; static int swapflag; static char *nlistf; static char *memf; static kvm_t *kd; static const char *usagestr; static void filemode(void); static int getfiles(struct xfile **, size_t *); static void swapmode(void); static void ttymode(void); static void ttyprt(struct xtty *); static void usage(void); int main(int argc, char *argv[]) { int ch, quit, ret; int fileflag, ttyflag; unsigned int i; char buf[_POSIX2_LINE_MAX]; const char *opts; fileflag = swapflag = ttyflag = 0; /* We will behave like good old swapinfo if thus invoked */ opts = strrchr(argv[0], '/'); if (opts) opts++; else opts = argv[0]; if (!strcmp(opts, "swapinfo")) { swapflag = 1; opts = "ghkmM:N:"; usagestr = "swapinfo [-ghkm] [-M core [-N system]]"; } else { opts = "TM:N:fghkmnst"; usagestr = "pstat [-Tfghkmnst] [-M core [-N system]]"; } while ((ch = getopt(argc, argv, opts)) != -1) switch (ch) { case 'f': fileflag = 1; break; case 'g': setenv("BLOCKSIZE", "1G", 1); break; case 'h': humanflag = 1; break; case 'k': setenv("BLOCKSIZE", "1K", 1); break; case 'm': setenv("BLOCKSIZE", "1M", 1); break; case 'M': memf = optarg; break; case 'N': nlistf = optarg; break; case 'n': usenumflag = 1; break; case 's': ++swapflag; break; case 'T': totalflag = 1; break; case 't': ttyflag = 1; break; default: usage(); } /* * Initialize symbol names list. */ for (i = 0; i < NNAMES; i++) nl[namelist[i].order].n_name = strdup(namelist[i].name); if (memf != NULL) { kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, buf); if (kd == NULL) errx(1, "kvm_openfiles: %s", buf); if ((ret = kvm_nlist(kd, nl)) != 0) { if (ret == -1) errx(1, "kvm_nlist: %s", kvm_geterr(kd)); quit = 0; for (i = 0; nl[i].n_name[0] != '\0'; ++i) if (nl[i].n_value == 0) { quit = 1; warnx("undefined symbol: %s", nl[i].n_name); } if (quit) exit(1); } } if (!(fileflag | ttyflag | swapflag | totalflag)) usage(); if (fileflag || totalflag) filemode(); if (ttyflag) ttymode(); if (swapflag || totalflag) swapmode(); exit (0); } static void usage(void) { fprintf(stderr, "usage: %s\n", usagestr); exit (1); } static const char fhdr32[] = " LOC TYPE FLG CNT MSG DATA OFFSET\n"; /* c0000000 ------ RWAI 123 123 c0000000 1000000000000000 */ static const char fhdr64[] = " LOC TYPE FLG CNT MSG DATA OFFSET\n"; /* c000000000000000 ------ RWAI 123 123 c000000000000000 1000000000000000 */ static const char hdr[] = " LINE INQ CAN LIN LOW OUTQ USE LOW COL SESS PGID STATE\n"; static void ttymode_kvm(void) { TAILQ_HEAD(, tty) tl; struct tty *tp, tty; struct xtty xt; (void)printf("%s", hdr); bzero(&xt, sizeof xt); xt.xt_size = sizeof xt; if (kvm_read(kd, nl[NL_TTY_LIST].n_value, &tl, sizeof tl) != sizeof tl) errx(1, "kvm_read(): %s", kvm_geterr(kd)); tp = TAILQ_FIRST(&tl); while (tp != NULL) { if (kvm_read(kd, (u_long)tp, &tty, sizeof tty) != sizeof tty) errx(1, "kvm_read(): %s", kvm_geterr(kd)); xt.xt_insize = tty.t_inq.ti_nblocks * TTYINQ_DATASIZE; xt.xt_incc = tty.t_inq.ti_linestart - tty.t_inq.ti_begin; xt.xt_inlc = tty.t_inq.ti_end - tty.t_inq.ti_linestart; xt.xt_inlow = tty.t_inlow; xt.xt_outsize = tty.t_outq.to_nblocks * TTYOUTQ_DATASIZE; xt.xt_outcc = tty.t_outq.to_end - tty.t_outq.to_begin; xt.xt_outlow = tty.t_outlow; xt.xt_column = tty.t_column; /* xt.xt_pgid = ... */ /* xt.xt_sid = ... */ xt.xt_flags = tty.t_flags; - xt.xt_dev = NODEV; + xt.xt_dev = (uint32_t)NODEV; ttyprt(&xt); tp = TAILQ_NEXT(&tty, t_list); } } static void ttymode_sysctl(void) { struct xtty *xttys; size_t len; unsigned int i, n; (void)printf("%s", hdr); if ((xttys = malloc(len = sizeof(*xttys))) == NULL) err(1, "malloc()"); while (sysctlbyname("kern.ttys", xttys, &len, 0, 0) == -1) { if (errno != ENOMEM) err(1, "sysctlbyname()"); len *= 2; if ((xttys = realloc(xttys, len)) == NULL) err(1, "realloc()"); } n = len / sizeof(*xttys); for (i = 0; i < n; i++) ttyprt(&xttys[i]); } static void ttymode(void) { if (kd != NULL) ttymode_kvm(); else ttymode_sysctl(); } static struct { int flag; char val; } ttystates[] = { #if 0 { TF_NOPREFIX, 'N' }, #endif { TF_INITLOCK, 'I' }, { TF_CALLOUT, 'C' }, /* Keep these together -> 'Oi' and 'Oo'. */ { TF_OPENED, 'O' }, { TF_OPENED_IN, 'i' }, { TF_OPENED_OUT, 'o' }, { TF_OPENED_CONS, 'c' }, { TF_GONE, 'G' }, { TF_OPENCLOSE, 'B' }, { TF_ASYNC, 'Y' }, { TF_LITERAL, 'L' }, /* Keep these together -> 'Hi' and 'Ho'. */ { TF_HIWAT, 'H' }, { TF_HIWAT_IN, 'i' }, { TF_HIWAT_OUT, 'o' }, { TF_STOPPED, 'S' }, { TF_EXCLUDE, 'X' }, { TF_BYPASS, 'l' }, { TF_ZOMBIE, 'Z' }, { TF_HOOK, 's' }, /* Keep these together -> 'bi' and 'bo'. */ { TF_BUSY, 'b' }, { TF_BUSY_IN, 'i' }, { TF_BUSY_OUT, 'o' }, { 0, '\0'}, }; static void ttyprt(struct xtty *xt) { int i, j; const char *name; if (xt->xt_size != sizeof *xt) errx(1, "struct xtty size mismatch"); if (usenumflag || xt->xt_dev == 0 || (name = devname(xt->xt_dev, S_IFCHR)) == NULL) printf("%#10jx ", (uintmax_t)xt->xt_dev); else printf("%10s ", name); printf("%5zu %4zu %4zu %4zu %5zu %4zu %4zu %5u %5d %5d ", xt->xt_insize, xt->xt_incc, xt->xt_inlc, (xt->xt_insize - xt->xt_inlow), xt->xt_outsize, xt->xt_outcc, (xt->xt_outsize - xt->xt_outlow), MIN(xt->xt_column, 99999), xt->xt_sid, xt->xt_pgid); for (i = j = 0; ttystates[i].flag; i++) if (xt->xt_flags & ttystates[i].flag) { putchar(ttystates[i].val); j++; } if (j == 0) putchar('-'); putchar('\n'); } static void filemode(void) { struct xfile *fp, *buf; char flagbuf[16], *fbp; int maxf, openf; size_t len; static char const * const dtypes[] = { "???", "inode", "socket", "pipe", "fifo", "kqueue", "crypto" }; int i; int wid; if (kd != NULL) { if (kvm_read(kd, nl[NL_MAXFILES].n_value, &maxf, sizeof maxf) != sizeof maxf || kvm_read(kd, nl[NL_NFILES].n_value, &openf, sizeof openf) != sizeof openf) errx(1, "kvm_read(): %s", kvm_geterr(kd)); } else { len = sizeof(int); if (sysctlbyname("kern.maxfiles", &maxf, &len, 0, 0) == -1 || sysctlbyname("kern.openfiles", &openf, &len, 0, 0) == -1) err(1, "sysctlbyname()"); } if (totalflag) { (void)printf("%3d/%3d files\n", openf, maxf); return; } if (getfiles(&buf, &len) == -1) return; openf = len / sizeof *fp; (void)printf("%d/%d open files\n", openf, maxf); printf(sizeof(uintptr_t) == 4 ? fhdr32 : fhdr64); wid = (int)sizeof(uintptr_t) * 2; for (fp = (struct xfile *)buf, i = 0; i < openf; ++fp, ++i) { if ((size_t)fp->xf_type >= sizeof(dtypes) / sizeof(dtypes[0])) continue; (void)printf("%*jx", wid, (uintmax_t)(uintptr_t)fp->xf_file); (void)printf(" %-6.6s", dtypes[fp->xf_type]); fbp = flagbuf; if (fp->xf_flag & FREAD) *fbp++ = 'R'; if (fp->xf_flag & FWRITE) *fbp++ = 'W'; if (fp->xf_flag & FAPPEND) *fbp++ = 'A'; if (fp->xf_flag & FASYNC) *fbp++ = 'I'; *fbp = '\0'; (void)printf(" %4s %3d", flagbuf, fp->xf_count); (void)printf(" %3d", fp->xf_msgcount); (void)printf(" %*jx", wid, (uintmax_t)(uintptr_t)fp->xf_data); (void)printf(" %*jx\n", (int)sizeof(fp->xf_offset) * 2, (uintmax_t)fp->xf_offset); } free(buf); } static int getfiles(struct xfile **abuf, size_t *alen) { struct xfile *buf; size_t len; int mib[2]; /* * XXX * Add emulation of KINFO_FILE here. */ if (kd != NULL) errx(1, "files on dead kernel, not implemented"); mib[0] = CTL_KERN; mib[1] = KERN_FILE; if (sysctl(mib, 2, NULL, &len, NULL, 0) == -1) { warn("sysctl: KERN_FILE"); return (-1); } if ((buf = malloc(len)) == NULL) errx(1, "malloc"); if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) { warn("sysctl: KERN_FILE"); return (-1); } *abuf = buf; *alen = len; return (0); } /* * swapmode is based on a program called swapinfo written * by Kevin Lahey . */ #define CONVERT(v) ((int64_t)(v) * pagesize / blocksize) #define CONVERT_BLOCKS(v) ((int64_t)(v) * pagesize) static struct kvm_swap swtot; static int nswdev; static void print_swap_header(void) { int hlen; long blocksize; const char *header; header = getbsize(&hlen, &blocksize); if (totalflag == 0) (void)printf("%-15s %*s %8s %8s %8s\n", "Device", hlen, header, "Used", "Avail", "Capacity"); } static void print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused, intmax_t bavail, float bpercent) { char usedbuf[5]; char availbuf[5]; int hlen, pagesize; long blocksize; pagesize = getpagesize(); getbsize(&hlen, &blocksize); printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks)); if (humanflag) { humanize_number(usedbuf, sizeof(usedbuf), CONVERT_BLOCKS(bused), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(availbuf, sizeof(availbuf), CONVERT_BLOCKS(bavail), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent); } else { printf("%8jd %8jd %5.0f%%\n", (intmax_t)CONVERT(bused), (intmax_t)CONVERT(bavail), bpercent); } } static void print_swap(struct kvm_swap *ksw) { swtot.ksw_total += ksw->ksw_total; swtot.ksw_used += ksw->ksw_used; ++nswdev; if (totalflag == 0) print_swap_line(ksw->ksw_devname, ksw->ksw_total, ksw->ksw_used, ksw->ksw_total - ksw->ksw_used, (ksw->ksw_used * 100.0) / ksw->ksw_total); } static void print_swap_total(void) { int hlen, pagesize; long blocksize; pagesize = getpagesize(); getbsize(&hlen, &blocksize); if (totalflag) { blocksize = 1024 * 1024; (void)printf("%jdM/%jdM swap space\n", CONVERT(swtot.ksw_used), CONVERT(swtot.ksw_total)); } else if (nswdev > 1) { print_swap_line("Total", swtot.ksw_total, swtot.ksw_used, swtot.ksw_total - swtot.ksw_used, (swtot.ksw_used * 100.0) / swtot.ksw_total); } } static void swapmode_kvm(void) { struct kvm_swap kswap[16]; int i, n; n = kvm_getswapinfo(kd, kswap, sizeof kswap / sizeof kswap[0], SWIF_DEV_PREFIX); print_swap_header(); for (i = 0; i < n; ++i) print_swap(&kswap[i]); print_swap_total(); } static void swapmode_sysctl(void) { struct kvm_swap ksw; struct xswdev xsw; size_t mibsize, size; int mib[16], n; print_swap_header(); mibsize = sizeof mib / sizeof mib[0]; if (sysctlnametomib("vm.swap_info", mib, &mibsize) == -1) err(1, "sysctlnametomib()"); for (n = 0; ; ++n) { mib[mibsize] = n; size = sizeof xsw; if (sysctl(mib, mibsize + 1, &xsw, &size, NULL, 0) == -1) break; if (xsw.xsw_version != XSWDEV_VERSION) errx(1, "xswdev version mismatch"); if (xsw.xsw_dev == NODEV) snprintf(ksw.ksw_devname, sizeof ksw.ksw_devname, ""); else snprintf(ksw.ksw_devname, sizeof ksw.ksw_devname, "/dev/%s", devname(xsw.xsw_dev, S_IFCHR)); ksw.ksw_used = xsw.xsw_used; ksw.ksw_total = xsw.xsw_nblks; ksw.ksw_flags = xsw.xsw_flags; print_swap(&ksw); } if (errno != ENOENT) err(1, "sysctl()"); print_swap_total(); } static void swapmode(void) { if (kd != NULL) swapmode_kvm(); else swapmode_sysctl(); } Index: head/usr.sbin/sa/extern.h =================================================================== --- head/usr.sbin/sa/extern.h (revision 318735) +++ head/usr.sbin/sa/extern.h (revision 318736) @@ -1,110 +1,110 @@ /* * Copyright (c) 1994 Christopher G. Demetriou * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include /* structures */ /* All times are stored in 1e-6s units. */ struct cmdinfo { char ci_comm[MAXCOMLEN+2]; /* command name (+ '*') */ uid_t ci_uid; /* user id */ u_quad_t ci_calls; /* number of calls */ double ci_etime; /* elapsed time */ double ci_utime; /* user time */ double ci_stime; /* system time */ double ci_mem; /* memory use */ double ci_io; /* number of disk i/o ops */ u_int ci_flags; /* flags; see below */ }; #define CI_UNPRINTABLE 0x0001 /* unprintable chars in name */ struct userinfo { uid_t ui_uid; /* user id; for consistency */ u_quad_t ui_calls; /* number of invocations */ double ui_utime; /* user time */ double ui_stime; /* system time */ double ui_mem; /* memory use */ double ui_io; /* number of disk i/o ops */ }; /* typedefs */ typedef int (*cmpf_t)(const DBT *, const DBT *); /* external functions in db.c */ int db_copy_in(DB **mdb, const char *dbname, const char *name, BTREEINFO *bti, int (*v1_to_v2)(DBT *key, DBT *data)); int db_copy_out(DB *mdb, const char *dbname, const char *name, BTREEINFO *bti); void db_destroy(DB *db, const char *uname); /* external functions in pdb.c */ int pacct_init(void); void pacct_destroy(void); int pacct_add(const struct cmdinfo *); int pacct_update(void); void pacct_print(void); /* external functions in readrec.c */ -int readrec_forward(FILE *f, struct acctv2 *av2); +int readrec_forward(FILE *f, struct acctv3 *av2); /* external functions in usrdb.c */ int usracct_init(void); void usracct_destroy(void); int usracct_add(const struct cmdinfo *); int usracct_update(void); void usracct_print(void); /* variables */ extern int aflag, bflag, cflag, dflag, Dflag, fflag, iflag, jflag, kflag; extern int Kflag, lflag, mflag, qflag, rflag, sflag, tflag, uflag, vflag; extern u_quad_t cutoff; extern cmpf_t sa_cmp; extern const char *pdb_file, *usrdb_file; /* some #defines to help with db's stupidity */ #define DB_CLOSE(db) \ ((*(db)->close)(db)) #define DB_GET(db, key, data, flags) \ ((*(db)->get)((db), (key), (data), (flags))) #define DB_PUT(db, key, data, flags) \ ((*(db)->put)((db), (key), (data), (flags))) #define DB_SYNC(db, flags) \ ((*(db)->sync)((db), (flags))) #define DB_SEQ(db, key, data, flags) \ ((*(db)->seq)((db), (key), (data), (flags))) Index: head/usr.sbin/sa/main.c =================================================================== --- head/usr.sbin/sa/main.c (revision 318735) +++ head/usr.sbin/sa/main.c (revision 318736) @@ -1,533 +1,533 @@ /* * Copyright (c) 1994 Christopher G. Demetriou * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1994 Christopher G. Demetriou\n\ All rights reserved.\n"; #endif #endif #include __FBSDID("$FreeBSD$"); /* * sa: system accounting */ #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" #include "pathnames.h" static FILE *acct_load(const char *, int); static int cmp_comm(const char *, const char *); static int cmp_usrsys(const DBT *, const DBT *); static int cmp_avgusrsys(const DBT *, const DBT *); static int cmp_dkio(const DBT *, const DBT *); static int cmp_avgdkio(const DBT *, const DBT *); static int cmp_cpumem(const DBT *, const DBT *); static int cmp_avgcpumem(const DBT *, const DBT *); static int cmp_calls(const DBT *, const DBT *); static void usage(void); int aflag, bflag, cflag, dflag, Dflag, fflag, iflag, jflag, kflag; int Kflag, lflag, mflag, qflag, rflag, sflag, tflag, uflag, vflag; u_quad_t cutoff = 1; const char *pdb_file = _PATH_SAVACCT; const char *usrdb_file = _PATH_USRACCT; static char *dfltargv[] = { NULL }; static int dfltargc = (sizeof dfltargv/sizeof(char *)); /* default to comparing by sum of user + system time */ cmpf_t sa_cmp = cmp_usrsys; int main(int argc, char **argv) { FILE *f; char pathacct[] = _PATH_ACCT; int ch, error = 0; dfltargv[0] = pathacct; while ((ch = getopt(argc, argv, "abcdDfijkKlmnP:qrstuU:v:")) != -1) switch (ch) { case 'a': /* print all commands */ aflag = 1; break; case 'b': /* sort by per-call user/system time average */ bflag = 1; sa_cmp = cmp_avgusrsys; break; case 'c': /* print percentage total time */ cflag = 1; break; case 'd': /* sort by averge number of disk I/O ops */ dflag = 1; sa_cmp = cmp_avgdkio; break; case 'D': /* print and sort by total disk I/O ops */ Dflag = 1; sa_cmp = cmp_dkio; break; case 'f': /* force no interactive threshold comprison */ fflag = 1; break; case 'i': /* do not read in summary file */ iflag = 1; break; case 'j': /* instead of total minutes, give sec/call */ jflag = 1; break; case 'k': /* sort by cpu-time average memory usage */ kflag = 1; sa_cmp = cmp_avgcpumem; break; case 'K': /* print and sort by cpu-storage integral */ sa_cmp = cmp_cpumem; Kflag = 1; break; case 'l': /* separate system and user time */ lflag = 1; break; case 'm': /* print procs and time per-user */ mflag = 1; break; case 'n': /* sort by number of calls */ sa_cmp = cmp_calls; break; case 'P': /* specify program database summary file */ pdb_file = optarg; break; case 'q': /* quiet; error messages only */ qflag = 1; break; case 'r': /* reverse order of sort */ rflag = 1; break; case 's': /* merge accounting file into summaries */ sflag = 1; break; case 't': /* report ratio of user and system times */ tflag = 1; break; case 'u': /* first, print uid and command name */ uflag = 1; break; case 'U': /* specify user database summary file */ usrdb_file = optarg; break; case 'v': /* cull junk */ vflag = 1; cutoff = atoi(optarg); break; case '?': default: usage(); } argc -= optind; argv += optind; /* various argument checking */ if (fflag && !vflag) errx(1, "only one of -f requires -v"); if (fflag && aflag) errx(1, "only one of -a and -v may be specified"); /* XXX need more argument checking */ if (!uflag) { /* initialize tables */ if ((sflag || (!mflag && !qflag)) && pacct_init() != 0) errx(1, "process accounting initialization failed"); if ((sflag || (mflag && !qflag)) && usracct_init() != 0) errx(1, "user accounting initialization failed"); } if (argc == 0) { argc = dfltargc; argv = dfltargv; } /* for each file specified */ for (; argc > 0; argc--, argv++) { /* * load the accounting data from the file. * if it fails, go on to the next file. */ f = acct_load(argv[0], sflag); if (f == NULL) continue; if (!uflag && sflag) { #ifndef DEBUG sigset_t nmask, omask; int unmask = 1; /* * block most signals so we aren't interrupted during * the update. */ if (sigfillset(&nmask) == -1) { warn("sigfillset"); unmask = 0; error = 1; } if (unmask && (sigprocmask(SIG_BLOCK, &nmask, &omask) == -1)) { warn("couldn't set signal mask"); unmask = 0; error = 1; } #endif /* DEBUG */ /* * truncate the accounting data file ASAP, to avoid * losing data. don't worry about errors in updating * the saved stats; better to underbill than overbill, * but we want every accounting record intact. */ if (ftruncate(fileno(f), 0) == -1) { warn("couldn't truncate %s", *argv); error = 1; } /* * update saved user and process accounting data. * note errors for later. */ if (pacct_update() != 0 || usracct_update() != 0) error = 1; #ifndef DEBUG /* * restore signals */ if (unmask && (sigprocmask(SIG_SETMASK, &omask, NULL) == -1)) { warn("couldn't restore signal mask"); error = 1; } #endif /* DEBUG */ } /* * close the opened accounting file */ if (fclose(f) == EOF) { warn("fclose %s", *argv); error = 1; } } if (!uflag && !qflag) { /* print any results we may have obtained. */ if (!mflag) pacct_print(); else usracct_print(); } if (!uflag) { /* finally, deallocate databases */ if (sflag || (!mflag && !qflag)) pacct_destroy(); if (sflag || (mflag && !qflag)) usracct_destroy(); } exit(error); } static void usage(void) { (void)fprintf(stderr, "usage: sa [-abcdDfijkKlmnqrstu] [-P file] [-U file] [-v cutoff] [file ...]\n"); exit(1); } static FILE * acct_load(const char *pn, int wr) { - struct acctv2 ac; + struct acctv3 ac; struct cmdinfo ci; ssize_t rv; FILE *f; int i; /* * open the file */ f = fopen(pn, wr ? "r+" : "r"); if (f == NULL) { warn("open %s %s", pn, wr ? "for read/write" : "read-only"); return (NULL); } /* * read all we can; don't stat and open because more processes * could exit, and we'd miss them */ while (1) { /* get one accounting entry and punt if there's an error */ rv = readrec_forward(f, &ac); if (rv != 1) { if (rv == EOF) warn("error reading %s", pn); break; } /* decode it */ ci.ci_calls = 1; for (i = 0; i < (int)sizeof ac.ac_comm && ac.ac_comm[i] != '\0'; i++) { char c = ac.ac_comm[i]; if (!isascii(c) || iscntrl(c)) { ci.ci_comm[i] = '?'; ci.ci_flags |= CI_UNPRINTABLE; } else ci.ci_comm[i] = c; } if (ac.ac_flagx & AFORK) ci.ci_comm[i++] = '*'; ci.ci_comm[i++] = '\0'; ci.ci_etime = ac.ac_etime; ci.ci_utime = ac.ac_utime; ci.ci_stime = ac.ac_stime; ci.ci_uid = ac.ac_uid; ci.ci_mem = ac.ac_mem; ci.ci_io = ac.ac_io; if (!uflag) { /* and enter it into the usracct and pacct databases */ if (sflag || (!mflag && !qflag)) pacct_add(&ci); if (sflag || (mflag && !qflag)) usracct_add(&ci); } else if (!qflag) printf("%6u %12.3lf cpu %12.0lfk mem %12.0lf io %s\n", ci.ci_uid, (ci.ci_utime + ci.ci_stime) / 1000000, ci.ci_mem, ci.ci_io, ci.ci_comm); } /* Finally, return the file stream for possible truncation. */ return (f); } /* sort commands, doing the right thing in terms of reversals */ static int cmp_comm(const char *s1, const char *s2) { int rv; rv = strcmp(s1, s2); if (rv == 0) rv = -1; return (rflag ? rv : -rv); } /* sort by total user and system time */ static int cmp_usrsys(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; double t1, t2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); t1 = c1.ci_utime + c1.ci_stime; t2 = c2.ci_utime + c2.ci_stime; if (t1 < t2) return -1; else if (t1 == t2) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by average user and system time */ static int cmp_avgusrsys(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; double t1, t2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); t1 = c1.ci_utime + c1.ci_stime; t1 /= (double) (c1.ci_calls ? c1.ci_calls : 1); t2 = c2.ci_utime + c2.ci_stime; t2 /= (double) (c2.ci_calls ? c2.ci_calls : 1); if (t1 < t2) return -1; else if (t1 == t2) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by total number of disk I/O operations */ static int cmp_dkio(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); if (c1.ci_io < c2.ci_io) return -1; else if (c1.ci_io == c2.ci_io) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by average number of disk I/O operations */ static int cmp_avgdkio(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; double n1, n2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); n1 = c1.ci_io / (double) (c1.ci_calls ? c1.ci_calls : 1); n2 = c2.ci_io / (double) (c2.ci_calls ? c2.ci_calls : 1); if (n1 < n2) return -1; else if (n1 == n2) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by the cpu-storage integral */ static int cmp_cpumem(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); if (c1.ci_mem < c2.ci_mem) return -1; else if (c1.ci_mem == c2.ci_mem) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by the cpu-time average memory usage */ static int cmp_avgcpumem(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; double t1, t2; double n1, n2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); t1 = c1.ci_utime + c1.ci_stime; t2 = c2.ci_utime + c2.ci_stime; n1 = c1.ci_mem / (t1 ? t1 : 1); n2 = c2.ci_mem / (t2 ? t2 : 1); if (n1 < n2) return -1; else if (n1 == n2) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; } /* sort by the number of invocations */ static int cmp_calls(const DBT *d1, const DBT *d2) { struct cmdinfo c1, c2; memcpy(&c1, d1->data, sizeof(c1)); memcpy(&c2, d2->data, sizeof(c2)); if (c1.ci_calls < c2.ci_calls) return -1; else if (c1.ci_calls == c2.ci_calls) return (cmp_comm(c1.ci_comm, c2.ci_comm)); else return 1; }