diff --git a/lib/libc/sys/closefrom.2 b/lib/libc/sys/closefrom.2 index db41c617dc7f..64c210f7897a 100644 --- a/lib/libc/sys/closefrom.2 +++ b/lib/libc/sys/closefrom.2 @@ -1,90 +1,94 @@ .\" Copyright (c) 2009 Hudson River Trading LLC .\" Written by: John H. Baldwin .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd April 12, 2020 +.Dd March 3, 2022 .Dt CLOSEFROM 2 .Os .Sh NAME .Nm closefrom , .Nm close_range .Nd delete open file descriptors .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Ft void .Fn closefrom "int lowfd" .Ft int .Fn close_range "u_int lowfd" "u_int highfd" "int flags" .Sh DESCRIPTION The .Fn closefrom system call deletes all open file descriptors greater than or equal to .Fa lowfd from the per-process object reference table. Any errors encountered while closing file descriptors are ignored. .Pp The .Fn close_range system call deletes all open file descriptors between .Fa lowfd and .Fa highfd inclusive, clamped to the range of open file descriptors. Any errors encountered while closing file descriptors are ignored. -There are currently no defined -.Fa flags . +Supported +.Fa flags : +.Bl -tag -width ".Dv CLOSE_RANGE_CLOEXEC" +.It Dv CLOSE_RANGE_CLOEXEC +Set the close-on-exec flag on descriptors in the range instead of closing them. +.El .Sh RETURN VALUES Upon successful completion, .Fn close_range returns a value of 0. Otherwise, a value of -1 is returned and the global variable .Va errno is set to indicate the error. .Sh ERRORS The .Fn close_range system call will fail if: .Bl -tag -width Er .It Bq Er EINVAL The .Fa highfd argument is lower than the .Fa lowfd argument. .It Bq Er EINVAL An invalid flag was set. .El .Sh SEE ALSO .Xr close 2 .Sh HISTORY The .Fn closefrom function first appeared in .Fx 8.0 . diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c index df461cf0dca1..35bce1ff77f9 100644 --- a/lib/libsysdecode/flags.c +++ b/lib/libsysdecode/flags.c @@ -1,1300 +1,1307 @@ /* * Copyright (c) 2006 "David Kirchner" . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define L2CAP_SOCKET_CHECKED #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This is taken from the xlat tables originally in truss which were * in turn taken from strace. */ struct name_table { uintmax_t val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } #define TABLE_START(n) static struct name_table n[] = { #define TABLE_ENTRY X #define TABLE_END XEND }; #include "tables.h" #undef TABLE_START #undef TABLE_ENTRY #undef TABLE_END /* * These are simple support macros. print_or utilizes a variable * defined in the calling function to track whether or not it should * print a logical-OR character ('|') before a string. if_print_or * simply handles the necessary "if" statement used in many lines * of this file. */ #define print_or(fp,str,orflag) do { \ if (orflag) fputc(fp, '|'); else orflag = true; \ fprintf(fp, str); } \ while (0) #define if_print_or(fp,i,flag,orflag) do { \ if ((i & flag) == flag) \ print_or(fp,#flag,orflag); } \ while (0) static const char * lookup_value(struct name_table *table, uintmax_t val) { for (; table->str != NULL; table++) if (table->val == val) return (table->str); return (NULL); } /* * Used when the value maps to a bitmask of #definition values in the * table. This is a helper routine which outputs a symbolic mask of * matched masks. Multiple masks are separated by a pipe ('|'). * The value is modified on return to only hold unmatched bits. */ static void print_mask_part(FILE *fp, struct name_table *table, uintmax_t *valp, bool *printed) { uintmax_t rem; rem = *valp; for (; table->str != NULL; table++) { if ((table->val & rem) == table->val) { /* * Only print a zero mask if the raw value is * zero. */ if (table->val == 0 && *valp != 0) continue; fprintf(fp, "%s%s", *printed ? "|" : "", table->str); *printed = true; rem &= ~table->val; } } *valp = rem; } /* * Used when the value maps to a bitmask of #definition values in the * table. The return value is true if something was printed. If * rem is not NULL, *rem holds any bits not decoded if something was * printed. If nothing was printed and rem is not NULL, *rem holds * the original value. */ static bool print_mask_int(FILE *fp, struct name_table *table, int ival, int *rem) { uintmax_t val; bool printed; printed = false; val = (unsigned)ival; print_mask_part(fp, table, &val, &printed); if (rem != NULL) *rem = val; return (printed); } /* * Used for a mask of optional flags where a value of 0 is valid. */ static bool print_mask_0(FILE *fp, struct name_table *table, int val, int *rem) { if (val == 0) { fputs("0", fp); if (rem != NULL) *rem = 0; return (true); } return (print_mask_int(fp, table, val, rem)); } /* * Like print_mask_0 but for a unsigned long instead of an int. */ static bool print_mask_0ul(FILE *fp, struct name_table *table, u_long lval, u_long *rem) { uintmax_t val; bool printed; if (lval == 0) { fputs("0", fp); if (rem != NULL) *rem = 0; return (true); } printed = false; val = lval; print_mask_part(fp, table, &val, &printed); if (rem != NULL) *rem = val; return (printed); } static void print_integer(FILE *fp, int val, int base) { switch (base) { case 8: fprintf(fp, "0%o", val); break; case 10: fprintf(fp, "%d", val); break; case 16: fprintf(fp, "0x%x", val); break; default: abort2("bad base", 0, NULL); break; } } static bool print_value(FILE *fp, struct name_table *table, uintmax_t val) { const char *str; str = lookup_value(table, val); if (str != NULL) { fputs(str, fp); return (true); } return (false); } const char * sysdecode_atfd(int fd) { if (fd == AT_FDCWD) return ("AT_FDCWD"); return (NULL); } bool sysdecode_atflags(FILE *fp, int flag, int *rem) { return (print_mask_int(fp, atflags, flag, rem)); } static struct name_table semctlops[] = { X(GETNCNT) X(GETPID) X(GETVAL) X(GETALL) X(GETZCNT) X(SETVAL) X(SETALL) X(IPC_RMID) X(IPC_SET) X(IPC_STAT) XEND }; const char * sysdecode_semctl_cmd(int cmd) { return (lookup_value(semctlops, cmd)); } static struct name_table shmctlops[] = { X(IPC_RMID) X(IPC_SET) X(IPC_STAT) XEND }; const char * sysdecode_shmctl_cmd(int cmd) { return (lookup_value(shmctlops, cmd)); } const char * sysdecode_msgctl_cmd(int cmd) { return (sysdecode_shmctl_cmd(cmd)); } static struct name_table semgetflags[] = { X(IPC_CREAT) X(IPC_EXCL) X(SEM_R) X(SEM_A) X((SEM_R>>3)) X((SEM_A>>3)) X((SEM_R>>6)) X((SEM_A>>6)) XEND }; bool sysdecode_semget_flags(FILE *fp, int flag, int *rem) { return (print_mask_int(fp, semgetflags, flag, rem)); } static struct name_table idtypes[] = { X(P_PID) X(P_PPID) X(P_PGID) X(P_SID) X(P_CID) X(P_UID) X(P_GID) X(P_ALL) X(P_LWPID) X(P_TASKID) X(P_PROJID) X(P_POOLID) X(P_JAILID) X(P_CTID) X(P_CPUID) X(P_PSETID) XEND }; /* XXX: idtype is really an idtype_t */ const char * sysdecode_idtype(int idtype) { return (lookup_value(idtypes, idtype)); } /* * [g|s]etsockopt's level argument can either be SOL_SOCKET or a * protocol-specific value. */ const char * sysdecode_sockopt_level(int level) { const char *str; if (level == SOL_SOCKET) return ("SOL_SOCKET"); /* SOL_* constants for Bluetooth sockets. */ str = lookup_value(ngbtsolevel, level); if (str != NULL) return (str); /* * IP and Infiniband sockets use IP protocols as levels. Not all * protocols are valid but it is simpler to just allow all of them. * * XXX: IPPROTO_IP == 0, but UNIX domain sockets use a level of 0 * for private options. */ str = sysdecode_ipproto(level); if (str != NULL) return (str); return (NULL); } bool sysdecode_vmprot(FILE *fp, int type, int *rem) { return (print_mask_int(fp, vmprot, type, rem)); } static struct name_table sockflags[] = { X(SOCK_CLOEXEC) X(SOCK_NONBLOCK) XEND }; bool sysdecode_socket_type(FILE *fp, int type, int *rem) { const char *str; uintmax_t val; bool printed; str = lookup_value(socktype, type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)); if (str != NULL) { fputs(str, fp); *rem = 0; printed = true; } else { *rem = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); printed = false; } val = type & (SOCK_CLOEXEC | SOCK_NONBLOCK); print_mask_part(fp, sockflags, &val, &printed); return (printed); } bool sysdecode_access_mode(FILE *fp, int mode, int *rem) { return (print_mask_int(fp, accessmode, mode, rem)); } /* XXX: 'type' is really an acl_type_t. */ const char * sysdecode_acltype(int type) { return (lookup_value(acltype, type)); } bool sysdecode_cap_fcntlrights(FILE *fp, uint32_t rights, uint32_t *rem) { return (print_mask_int(fp, capfcntl, rights, rem)); } +bool +sysdecode_close_range_flags(FILE *fp, int flags, int *rem) +{ + + return (print_mask_int(fp, closerangeflags, flags, rem)); +} + const char * sysdecode_extattrnamespace(int namespace) { return (lookup_value(extattrns, namespace)); } const char * sysdecode_fadvice(int advice) { return (lookup_value(fadvisebehav, advice)); } bool sysdecode_open_flags(FILE *fp, int flags, int *rem) { bool printed; int mode; uintmax_t val; mode = flags & O_ACCMODE; flags &= ~O_ACCMODE; switch (mode) { case O_RDONLY: if (flags & O_EXEC) { flags &= ~O_EXEC; fputs("O_EXEC", fp); } else fputs("O_RDONLY", fp); printed = true; mode = 0; break; case O_WRONLY: fputs("O_WRONLY", fp); printed = true; mode = 0; break; case O_RDWR: fputs("O_RDWR", fp); printed = true; mode = 0; break; default: printed = false; } val = (unsigned)flags; print_mask_part(fp, openflags, &val, &printed); if (rem != NULL) *rem = val | mode; return (printed); } bool sysdecode_fcntl_fileflags(FILE *fp, int flags, int *rem) { bool printed; int oflags; /* * The file flags used with F_GETFL/F_SETFL mostly match the * flags passed to open(2). However, a few open-only flag * bits have been repurposed for fcntl-only flags. */ oflags = flags & ~(O_NOFOLLOW | FRDAHEAD); printed = sysdecode_open_flags(fp, oflags, rem); if (flags & O_NOFOLLOW) { fprintf(fp, "%sFPOIXSHM", printed ? "|" : ""); printed = true; } if (flags & FRDAHEAD) { fprintf(fp, "%sFRDAHEAD", printed ? "|" : ""); printed = true; } return (printed); } bool sysdecode_flock_operation(FILE *fp, int operation, int *rem) { return (print_mask_int(fp, flockops, operation, rem)); } static struct name_table getfsstatmode[] = { X(MNT_WAIT) X(MNT_NOWAIT) XEND }; const char * sysdecode_getfsstat_mode(int mode) { return (lookup_value(getfsstatmode, mode)); } const char * sysdecode_getrusage_who(int who) { return (lookup_value(rusage, who)); } static struct name_table kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND }; static struct name_table kevent_rdwr_fflags[] = { X(NOTE_LOWAT) X(NOTE_FILE_POLL) XEND }; static struct name_table kevent_vnode_fflags[] = { X(NOTE_DELETE) X(NOTE_WRITE) X(NOTE_EXTEND) X(NOTE_ATTRIB) X(NOTE_LINK) X(NOTE_RENAME) X(NOTE_REVOKE) X(NOTE_OPEN) X(NOTE_CLOSE) X(NOTE_CLOSE_WRITE) X(NOTE_READ) XEND }; static struct name_table kevent_proc_fflags[] = { X(NOTE_EXIT) X(NOTE_FORK) X(NOTE_EXEC) X(NOTE_TRACK) X(NOTE_TRACKERR) X(NOTE_CHILD) XEND }; static struct name_table kevent_timer_fflags[] = { X(NOTE_SECONDS) X(NOTE_MSECONDS) X(NOTE_USECONDS) X(NOTE_NSECONDS) X(NOTE_ABSTIME) XEND }; void sysdecode_kevent_fflags(FILE *fp, short filter, int fflags, int base) { int rem; if (fflags == 0) { fputs("0", fp); return; } switch (filter) { case EVFILT_READ: case EVFILT_WRITE: if (!print_mask_int(fp, kevent_rdwr_fflags, fflags, &rem)) fprintf(fp, "%#x", rem); else if (rem != 0) fprintf(fp, "|%#x", rem); break; case EVFILT_VNODE: if (!print_mask_int(fp, kevent_vnode_fflags, fflags, &rem)) fprintf(fp, "%#x", rem); else if (rem != 0) fprintf(fp, "|%#x", rem); break; case EVFILT_PROC: case EVFILT_PROCDESC: if (!print_mask_int(fp, kevent_proc_fflags, fflags, &rem)) fprintf(fp, "%#x", rem); else if (rem != 0) fprintf(fp, "|%#x", rem); break; case EVFILT_TIMER: if (!print_mask_int(fp, kevent_timer_fflags, fflags, &rem)) fprintf(fp, "%#x", rem); else if (rem != 0) fprintf(fp, "|%#x", rem); break; case EVFILT_USER: { unsigned int ctrl, data; ctrl = fflags & NOTE_FFCTRLMASK; data = fflags & NOTE_FFLAGSMASK; if (fflags & NOTE_TRIGGER) { fputs("NOTE_TRIGGER", fp); if (fflags == NOTE_TRIGGER) return; fputc('|', fp); } /* * An event with 'ctrl' == NOTE_FFNOP is either a reported * (output) event for which only 'data' should be output * or a pointless input event. Assume that pointless * input events don't occur in practice. An event with * NOTE_TRIGGER is always an input event. */ if (ctrl != NOTE_FFNOP || fflags & NOTE_TRIGGER) { fprintf(fp, "%s|%#x", lookup_value(kevent_user_ffctrl, ctrl), data); } else { print_integer(fp, data, base); } break; } default: print_integer(fp, fflags, base); break; } } bool sysdecode_kevent_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, keventflags, flags, rem)); } const char * sysdecode_kevent_filter(int filter) { return (lookup_value(keventfilters, filter)); } const char * sysdecode_kldsym_cmd(int cmd) { return (lookup_value(kldsymcmd, cmd)); } const char * sysdecode_kldunload_flags(int flags) { return (lookup_value(kldunloadfflags, flags)); } const char * sysdecode_lio_listio_mode(int mode) { return (lookup_value(lio_listiomodes, mode)); } const char * sysdecode_madvice(int advice) { return (lookup_value(madvisebehav, advice)); } const char * sysdecode_minherit_inherit(int inherit) { return (lookup_value(minheritflags, inherit)); } bool sysdecode_mlockall_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, mlockallflags, flags, rem)); } bool sysdecode_mmap_prot(FILE *fp, int prot, int *rem) { int protm; bool printed; printed = false; protm = PROT_MAX_EXTRACT(prot); prot &= ~PROT_MAX(protm); if (protm != 0) { fputs("PROT_MAX(", fp); printed = print_mask_int(fp, mmapprot, protm, rem); fputs(")|", fp); } return (print_mask_int(fp, mmapprot, prot, rem) || printed); } bool sysdecode_fileflags(FILE *fp, fflags_t flags, fflags_t *rem) { return (print_mask_0(fp, fileflags, flags, rem)); } bool sysdecode_filemode(FILE *fp, int mode, int *rem) { return (print_mask_0(fp, filemode, mode, rem)); } bool sysdecode_mount_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, mountflags, flags, rem)); } bool sysdecode_msync_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, msyncflags, flags, rem)); } const char * sysdecode_nfssvc_flags(int flags) { return (lookup_value(nfssvcflags, flags)); } static struct name_table pipe2flags[] = { X(O_CLOEXEC) X(O_NONBLOCK) XEND }; bool sysdecode_pipe2_flags(FILE *fp, int flags, int *rem) { return (print_mask_0(fp, pipe2flags, flags, rem)); } const char * sysdecode_prio_which(int which) { return (lookup_value(prio, which)); } const char * sysdecode_procctl_cmd(int cmd) { return (lookup_value(procctlcmd, cmd)); } const char * sysdecode_ptrace_request(int request) { return (lookup_value(ptraceop, request)); } static struct name_table quotatypes[] = { X(GRPQUOTA) X(USRQUOTA) XEND }; bool sysdecode_quotactl_cmd(FILE *fp, int cmd) { const char *primary, *type; primary = lookup_value(quotactlcmds, cmd >> SUBCMDSHIFT); if (primary == NULL) return (false); fprintf(fp, "QCMD(%s,", primary); type = lookup_value(quotatypes, cmd & SUBCMDMASK); if (type != NULL) fprintf(fp, "%s", type); else fprintf(fp, "%#x", cmd & SUBCMDMASK); fprintf(fp, ")"); return (true); } bool sysdecode_reboot_howto(FILE *fp, int howto, int *rem) { bool printed; /* * RB_AUTOBOOT is special in that its value is zero, but it is * also an implied argument if a different operation is not * requested via RB_HALT, RB_POWERCYCLE, RB_POWEROFF, or * RB_REROOT. */ if (howto != 0 && (howto & (RB_HALT | RB_POWEROFF | RB_REROOT | RB_POWERCYCLE)) == 0) { fputs("RB_AUTOBOOT|", fp); printed = true; } else printed = false; return (print_mask_int(fp, rebootopt, howto, rem) || printed); } bool sysdecode_rfork_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, rforkflags, flags, rem)); } const char * sysdecode_rlimit(int resource) { return (lookup_value(rlimit, resource)); } const char * sysdecode_scheduler_policy(int policy) { return (lookup_value(schedpolicy, policy)); } bool sysdecode_sendfile_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, sendfileflags, flags, rem)); } bool sysdecode_shmat_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, shmatflags, flags, rem)); } const char * sysdecode_shutdown_how(int how) { return (lookup_value(shutdownhow, how)); } const char * sysdecode_sigbus_code(int si_code) { return (lookup_value(sigbuscode, si_code)); } const char * sysdecode_sigchld_code(int si_code) { return (lookup_value(sigchldcode, si_code)); } const char * sysdecode_sigfpe_code(int si_code) { return (lookup_value(sigfpecode, si_code)); } const char * sysdecode_sigill_code(int si_code) { return (lookup_value(sigillcode, si_code)); } const char * sysdecode_sigsegv_code(int si_code) { return (lookup_value(sigsegvcode, si_code)); } const char * sysdecode_sigtrap_code(int si_code) { return (lookup_value(sigtrapcode, si_code)); } const char * sysdecode_sigprocmask_how(int how) { return (lookup_value(sigprocmaskhow, how)); } const char * sysdecode_socketdomain(int domain) { return (lookup_value(sockdomain, domain)); } const char * sysdecode_socket_protocol(int domain, int protocol) { switch (domain) { case PF_INET: case PF_INET6: return (lookup_value(sockipproto, protocol)); default: return (NULL); } } const char * sysdecode_sockaddr_family(int sa_family) { return (lookup_value(sockfamily, sa_family)); } const char * sysdecode_ipproto(int protocol) { return (lookup_value(sockipproto, protocol)); } const char * sysdecode_sockopt_name(int level, int optname) { if (level == SOL_SOCKET) return (lookup_value(sockopt, optname)); if (level == IPPROTO_IP) /* XXX: UNIX domain socket options use a level of 0 also. */ return (lookup_value(sockoptip, optname)); if (level == IPPROTO_IPV6) return (lookup_value(sockoptipv6, optname)); if (level == IPPROTO_SCTP) return (lookup_value(sockoptsctp, optname)); if (level == IPPROTO_TCP) return (lookup_value(sockopttcp, optname)); if (level == IPPROTO_UDP) return (lookup_value(sockoptudp, optname)); if (level == IPPROTO_UDPLITE) return (lookup_value(sockoptudplite, optname)); return (NULL); } bool sysdecode_thr_create_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, thrcreateflags, flags, rem)); } const char * sysdecode_umtx_op(int op) { return (lookup_value(umtxop, op)); } bool sysdecode_umtx_op_flags(FILE *fp, int op, int *rem) { uintmax_t val; bool printed; printed = false; val = (unsigned)op; print_mask_part(fp, umtxopflags, &val, &printed); if (rem != NULL) *rem = val; return (printed); } const char * sysdecode_vmresult(int result) { return (lookup_value(vmresult, result)); } bool sysdecode_wait4_options(FILE *fp, int options, int *rem) { bool printed; int opt6; /* A flags value of 0 is normal. */ if (options == 0) { fputs("0", fp); if (rem != NULL) *rem = 0; return (true); } /* * These flags are implicit and aren't valid flags for wait4() * directly (though they don't fail with EINVAL). */ opt6 = options & (WEXITED | WTRAPPED); options &= ~opt6; printed = print_mask_int(fp, wait6opt, options, rem); if (rem != NULL) *rem |= opt6; return (printed); } bool sysdecode_wait6_options(FILE *fp, int options, int *rem) { return (print_mask_int(fp, wait6opt, options, rem)); } const char * sysdecode_whence(int whence) { return (lookup_value(seekwhence, whence)); } const char * sysdecode_fcntl_cmd(int cmd) { return (lookup_value(fcntlcmd, cmd)); } static struct name_table fcntl_fd_arg[] = { X(FD_CLOEXEC) X(0) XEND }; bool sysdecode_fcntl_arg_p(int cmd) { switch (cmd) { case F_GETFD: case F_GETFL: case F_GETOWN: return (false); default: return (true); } } void sysdecode_fcntl_arg(FILE *fp, int cmd, uintptr_t arg, int base) { int rem; switch (cmd) { case F_SETFD: if (!print_value(fp, fcntl_fd_arg, arg)) print_integer(fp, arg, base); break; case F_SETFL: if (!sysdecode_fcntl_fileflags(fp, arg, &rem)) fprintf(fp, "%#x", rem); else if (rem != 0) fprintf(fp, "|%#x", rem); break; case F_GETLK: case F_SETLK: case F_SETLKW: fprintf(fp, "%p", (void *)arg); break; default: print_integer(fp, arg, base); break; } } bool sysdecode_mmap_flags(FILE *fp, int flags, int *rem) { uintmax_t val; bool printed; int align; /* * MAP_ALIGNED can't be handled directly by print_mask_int(). * MAP_32BIT is also problematic since it isn't defined for * all platforms. */ printed = false; align = flags & MAP_ALIGNMENT_MASK; val = (unsigned)flags & ~MAP_ALIGNMENT_MASK; print_mask_part(fp, mmapflags, &val, &printed); #ifdef MAP_32BIT if (val & MAP_32BIT) { fprintf(fp, "%sMAP_32BIT", printed ? "|" : ""); printed = true; val &= ~MAP_32BIT; } #endif if (align != 0) { if (printed) fputc('|', fp); if (align == MAP_ALIGNED_SUPER) fputs("MAP_ALIGNED_SUPER", fp); else fprintf(fp, "MAP_ALIGNED(%d)", align >> MAP_ALIGNMENT_SHIFT); printed = true; } if (rem != NULL) *rem = val; return (printed); } const char * sysdecode_pathconf_name(int name) { return (lookup_value(pathconfname, name)); } const char * sysdecode_rtprio_function(int function) { return (lookup_value(rtpriofuncs, function)); } bool sysdecode_msg_flags(FILE *fp, int flags, int *rem) { return (print_mask_0(fp, msgflags, flags, rem)); } const char * sysdecode_sigcode(int sig, int si_code) { const char *str; str = lookup_value(sigcode, si_code); if (str != NULL) return (str); switch (sig) { case SIGILL: return (sysdecode_sigill_code(si_code)); case SIGBUS: return (sysdecode_sigbus_code(si_code)); case SIGSEGV: return (sysdecode_sigsegv_code(si_code)); case SIGFPE: return (sysdecode_sigfpe_code(si_code)); case SIGTRAP: return (sysdecode_sigtrap_code(si_code)); case SIGCHLD: return (sysdecode_sigchld_code(si_code)); default: return (NULL); } } const char * sysdecode_sysarch_number(int number) { return (lookup_value(sysarchnum, number)); } bool sysdecode_umtx_cvwait_flags(FILE *fp, u_long flags, u_long *rem) { return (print_mask_0ul(fp, umtxcvwaitflags, flags, rem)); } bool sysdecode_umtx_rwlock_flags(FILE *fp, u_long flags, u_long *rem) { return (print_mask_0ul(fp, umtxrwlockflags, flags, rem)); } void sysdecode_cap_rights(FILE *fp, cap_rights_t *rightsp) { struct name_table *t; int i; bool comma; for (i = 0; i < CAPARSIZE(rightsp); i++) { if (CAPIDXBIT(rightsp->cr_rights[i]) != 1 << i) { fprintf(fp, "invalid cap_rights_t"); return; } } comma = false; for (t = caprights; t->str != NULL; t++) { if (cap_rights_is_set(rightsp, t->val)) { fprintf(fp, "%s%s", comma ? "," : "", t->str); comma = true; } } } static struct name_table cmsgtypeip[] = { X(IP_RECVDSTADDR) X(IP_RECVTTL) X(IP_RECVOPTS) X(IP_RECVRETOPTS) X(IP_RECVIF) X(IP_RECVTOS) X(IP_FLOWID) X(IP_FLOWTYPE) X(IP_RSSBUCKETID) XEND }; static struct name_table cmsgtypeipv6[] = { #if 0 /* The RFC 2292 defines are kernel space only. */ X(IPV6_2292PKTINFO) X(IPV6_2292HOPLIMIT) X(IPV6_2292HOPOPTS) X(IPV6_2292DSTOPTS) X(IPV6_2292RTHDR) X(IPV6_2292NEXTHOP) #endif X(IPV6_PKTINFO) X(IPV6_HOPLIMIT) X(IPV6_HOPOPTS) X(IPV6_DSTOPTS) X(IPV6_RTHDR) X(IPV6_NEXTHOP) X(IPV6_TCLASS) X(IPV6_FLOWID) X(IPV6_FLOWTYPE) X(IPV6_RSSBUCKETID) X(IPV6_PATHMTU) X(IPV6_RTHDRDSTOPTS) X(IPV6_USE_MIN_MTU) X(IPV6_DONTFRAG) X(IPV6_PREFER_TEMPADDR) XEND }; static struct name_table cmsgtypesctp[] = { X(SCTP_INIT) X(SCTP_SNDRCV) X(SCTP_EXTRCV) X(SCTP_SNDINFO) X(SCTP_RCVINFO) X(SCTP_NXTINFO) X(SCTP_PRINFO) X(SCTP_AUTHINFO) X(SCTP_DSTADDRV4) X(SCTP_DSTADDRV6) XEND }; const char * sysdecode_cmsg_type(int cmsg_level, int cmsg_type) { if (cmsg_level == SOL_SOCKET) return (lookup_value(cmsgtypesocket, cmsg_type)); if (cmsg_level == IPPROTO_IP) return (lookup_value(cmsgtypeip, cmsg_type)); if (cmsg_level == IPPROTO_IPV6) return (lookup_value(cmsgtypeipv6, cmsg_type)); if (cmsg_level == IPPROTO_SCTP) return (lookup_value(cmsgtypesctp, cmsg_type)); return (NULL); } const char * sysdecode_sctp_pr_policy(int policy) { return (lookup_value(sctpprpolicy, policy)); } static struct name_table sctpsndflags[] = { X(SCTP_EOF) X(SCTP_ABORT) X(SCTP_UNORDERED) X(SCTP_ADDR_OVER) X(SCTP_SENDALL) X(SCTP_EOR) X(SCTP_SACK_IMMEDIATELY) XEND }; bool sysdecode_sctp_snd_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, sctpsndflags, flags, rem)); } static struct name_table sctprcvflags[] = { X(SCTP_UNORDERED) XEND }; bool sysdecode_sctp_rcv_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, sctprcvflags, flags, rem)); } static struct name_table sctpnxtflags[] = { X(SCTP_UNORDERED) X(SCTP_COMPLETE) X(SCTP_NOTIFICATION) XEND }; bool sysdecode_sctp_nxt_flags(FILE *fp, int flags, int *rem) { return (print_mask_int(fp, sctpnxtflags, flags, rem)); } static struct name_table sctpsinfoflags[] = { X(SCTP_EOF) X(SCTP_ABORT) X(SCTP_UNORDERED) X(SCTP_ADDR_OVER) X(SCTP_SENDALL) X(SCTP_EOR) X(SCTP_SACK_IMMEDIATELY) XEND }; void sysdecode_sctp_sinfo_flags(FILE *fp, int sinfo_flags) { const char *temp; int rem; bool printed; printed = print_mask_0(fp, sctpsinfoflags, sinfo_flags, &rem); if (rem & ~SCTP_PR_SCTP_ALL) { fprintf(fp, "%s%#x", printed ? "|" : "", rem & ~SCTP_PR_SCTP_ALL); printed = true; rem &= ~SCTP_PR_SCTP_ALL; } if (rem != 0) { temp = sysdecode_sctp_pr_policy(rem); if (temp != NULL) { fprintf(fp, "%s%s", printed ? "|" : "", temp); } else { fprintf(fp, "%s%#x", printed ? "|" : "", rem); } } } bool sysdecode_shmflags(FILE *fp, int flags, int *rem) { return (print_mask_0(fp, shmflags, flags, rem)); } diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables index afc8d82d1556..66f243eab6e6 100644 --- a/lib/libsysdecode/mktables +++ b/lib/libsysdecode/mktables @@ -1,186 +1,187 @@ #!/bin/sh # # Copyright (c) 2006 "David Kirchner" . All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # # Generates tables.h # # Originally this script was 'mksubr' for kdump which generated a complete # C file along with function definitions. Now this script generates tables # of constants and names extracted from header files. set -e LC_ALL=C; export LC_ALL if [ -z "$1" ] then echo "usage: sh $0 include-dir [output-file]" exit 1 fi include_dir=$1 if [ -n "$2" ]; then output_file="$2" output_tmp=$(mktemp -u) exec > "$output_tmp" fi all_headers= # # Generate a table C #definitions. The including file can define the # TABLE_NAME(n), TABLE_ENTRY(x), and TABLE_END macros to define what # the tables map to. # gen_table() { local name grep file excl filter name=$1 grep=$2 file=$3 excl=$4 if [ -z "$excl" ]; then filter="cat" else filter="egrep -v" fi cat <<_EOF_ TABLE_START(${name}) _EOF_ if [ -e "${include_dir}/${file}" ]; then all_headers="${all_headers:+${all_headers} }${file}" egrep "^#[[:space:]]*define[[:space:]]+"${grep}"[[:space:]]*" \ $include_dir/$file | ${filter} ${excl} | \ awk '{ for (i = 1; i <= NF; i++) \ if ($i ~ /define/) \ break; \ ++i; \ printf "TABLE_ENTRY(%s)\n", $i }' fi cat <<_EOF_ TABLE_END _EOF_ } cat <<_EOF_ /* This file is auto-generated. */ _EOF_ gen_table "accessmode" "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+" "sys/unistd.h" gen_table "acltype" "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+" "sys/acl.h" gen_table "atflags" "AT_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/fcntl.h" gen_table "capfcntl" "CAP_FCNTL_[A-Z]+[[:space:]]+\(1" "sys/capsicum.h" +gen_table "closerangeflags" "CLOSE_RANGE_[A-Z]+[[:space:]]+\([0-9]+<<[0-9]+\)" "sys/unistd.h" gen_table "extattrns" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h" gen_table "fadvisebehav" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" gen_table "openflags" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" "O_RDONLY|O_RDWR|O_WRONLY" gen_table "flockops" "LOCK_[A-Z]+[[:space:]]+0x[0-9]+" "sys/fcntl.h" gen_table "kldsymcmd" "KLDSYM_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "kldunloadfflags" "LINKER_UNLOAD_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "lio_listiomodes" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" "aio.h" gen_table "madvisebehav" "_?MADV_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h" gen_table "minheritflags" "INHERIT_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h" gen_table "mlockallflags" "MCL_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mman.h" gen_table "mmapprot" "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h" gen_table "ngbtsolevel" "SOL_[A-Z0-9]+[[:space:]]+0x[0-9A-Fa-f]+" "netgraph/bluetooth/include/ng_btsocket.h" gen_table "fileflags" "[SU]F_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/stat.h" "UF_COMPRESSED|UF_TRACKED|UF_SETTABLE|SF_SETTABLE" gen_table "filemode" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h" gen_table "keventflags" "EV_[A-Z]+[[:space:]]+0x[0-9]+" "sys/event.h" "EV_SYSFLAGS|EV_DROP|EV_FLAG[12]" gen_table "keventfilters" "EVFILT_[A-Z]+[[:space:]]+\(-[0-9]+\)" "sys/event.h" gen_table "mountflags" "MNT_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mount.h" gen_table "msyncflags" "MS_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mman.h" gen_table "nfssvcflags" "NFSSVC_[A-Z0-9]+[[:space:]]+0x[0-9]+" "nfs/nfssvc.h" gen_table "pathconfname" "_PC_[A-Z4_]+[[:space:]]+[0-9]+" "sys/unistd.h" gen_table "prio" "PRIO_[A-Z]+[[:space:]]+[0-9]" "sys/resource.h" gen_table "procctlcmd" "PROC_[A-Z_]+[[:space:]]+[0-9]" "sys/procctl.h" "PROC_TRACE_CTL_" gen_table "ptraceop" "PT_[[:alnum:]_]+[[:space:]]+[0-9]+" "sys/ptrace.h" gen_table "quotactlcmds" "Q_[A-Z]+[[:space:]]+0x[0-9]+" "ufs/ufs/quota.h" gen_table "rebootopt" "RB_[A-Z]+[[:space:]]+0x[0-9]+" "sys/reboot.h" gen_table "rforkflags" "RF[A-Z]+[[:space:]]+\([0-9]+<<[0-9]+\)" "sys/unistd.h" gen_table "rlimit" "RLIMIT_[A-Z]+[[:space:]]+[0-9]+" "sys/resource.h" gen_table "rusage" "RUSAGE_[A-Z]+[[:space:]]+[-0-9]+" "sys/resource.h" gen_table "schedpolicy" "SCHED_[A-Z]+[[:space:]]+[0-9]+" "sched.h" gen_table "sendfileflags" "SF_[A-Z]+[[:space:]]+[0-9]+" "sys/socket.h" gen_table "shmatflags" "SHM_[A-Z]+[[:space:]]+[0-9]{6}" "sys/shm.h" gen_table "shutdownhow" "SHUT_[A-Z]+[[:space:]]+[0-9]+" "sys/socket.h" gen_table "sigbuscode" "BUS_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigchldcode" "CLD_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigfpecode" "FPE_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigprocmaskhow" "SIG_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigillcode" "ILL_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigsegvcode" "SEGV_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sigtrapcode" "TRAP_[A-Z]+[[:space:]]+[0-9]+" "sys/signal.h" gen_table "sockdomain" "PF_[[:alnum:]]+[[:space:]]+" "sys/socket.h" gen_table "sockfamily" "AF_[[:alnum:]]+[[:space:]]+" "sys/socket.h" gen_table "sockipproto" "IPPROTO_[[:alnum:]]+[[:space:]]+" "netinet/in.h" gen_table "sockopt" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h" gen_table "sockoptip" "(IP_[[:alnum:]_]+|MCAST_[[:alnum:]_]+_GROUP)[[:space:]]+" "netinet/in.h" "IP_DEFAULT|IP_MIN|IP_MAX|IP_PORTRANGE" gen_table "sockoptipv6" "IPV6_[[:alnum:]_]+[[:space:]]+[0-9]+" "netinet6/in6.h" "IPV6_ADDR_|IPV6_TAG_DIRECT|IPV6_OPTIONS|IPV6_RECVOPTS|IPV6_RECVRETOPTS|IPV6_RECVDSTADDR|IPV6_RETOPTS|IPV6_2292|IPV6_RECVRTHDRDSTOPTS|IPV6_REACHCONF|IPV6_PKTOPTIONS" gen_table "sockoptsctp" "SCTP_[[:alnum:]_]+[[:space:]]+[0-9]+" "netinet/sctp.h" gen_table "sockopttcp" "TCP_[[:alnum:]_]+[[:space:]]+[0-9]+" "netinet/tcp.h" "TCP_MIN|TCP_MAX[^S]|TCP_MSS|TCP_[[:alnum:]_]+_MAX|TCP_FASTOPEN_MIN_COOKIE_LEN|TCP_FASTOPEN_PSK_LEN" gen_table "sockoptudp" "UDP_[[:alnum:]]+[[:space:]]+[0-9]+" "netinet/udp.h" "UDP_ENCAP_" gen_table "sockoptudplite" "UDPLITE_[[:alnum:]_]+[[:space:]]+[0-9]+" "netinet/udplite.h" gen_table "socktype" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" "sys/socket.h" gen_table "thrcreateflags" "THR_[A-Z]+[[:space:]]+0x[0-9]+" "sys/thr.h" gen_table "umtxop" "UMTX_OP_[[:alnum:]][[:alnum:]_]*[[:space:]]+[0-9]+" "sys/umtx.h" gen_table "umtxopflags" "UMTX_OP__[[:alnum:]_]+[[:space:]]+[0-9]+" "sys/umtx.h" gen_table "vmprot" "VM_PROT_[A-Z_]+[[:space:]]+\(\(vm_prot_t\)[[:space:]]+0x[0-9]+\)" "vm/vm.h" gen_table "vmresult" "KERN_[A-Z_]+[[:space:]]+[0-9]+" "vm/vm_param.h" gen_table "wait6opt" "W[A-Z]+[[:space:]]+[0-9]+" "sys/wait.h" gen_table "seekwhence" "SEEK_[A-Z]+[[:space:]]+[0-9]+" "sys/unistd.h" gen_table "fcntlcmd" "F_[A-Z0-9_]+[[:space:]]+[0-9]+[[:space:]]+" "sys/fcntl.h" "F_CANCEL|F_..LCK" gen_table "mmapflags" "MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h" gen_table "rtpriofuncs" "RTP_[A-Z]+[[:space:]]+[0-9]+" "sys/rtprio.h" gen_table "msgflags" "MSG_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/socket.h" "MSG_SOCALLBCK|MSG_MORETOCOME|MSG_TLSAPPDATA" gen_table "sigcode" "SI_[A-Z]+[[:space:]]+0(x[0-9abcdef]+)?" "sys/signal.h" gen_table "umtxcvwaitflags" "CVWAIT_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/umtx.h" gen_table "umtxrwlockflags" "URWLOCK_PREFER_READER[[:space:]]+0x[0-9]+" "sys/umtx.h" gen_table "caprights" "CAP_[A-Z_]+[[:space:]]+CAPRIGHT\([0-9],[[:space:]]+0x[0-9]{16}ULL\)" "sys/capsicum.h" gen_table "sctpprpolicy" "SCTP_PR_SCTP_[A-Z_]+[[:space:]]+0x[0-9]+" "netinet/sctp_uio.h" "SCTP_PR_SCTP_ALL" gen_table "cmsgtypesocket" "SCM_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/socket.h" if [ -e "${include_dir}/x86/sysarch.h" ]; then gen_table "sysarchnum" "(AMD64|I386)_[A-Z86_]+[[:space:]]+[0-9]+" "x86/sysarch.h" else gen_table "sysarchnum" "[A-Z_]+[[:space:]]+[0-9]+" "machine/sysarch.h" fi gen_table "shmflags" "SHM_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/mman.h" "SHM_ANON" # Generate a .depend file for our output file if [ -n "$output_file" ]; then depend_tmp=$(mktemp -u) { echo "$output_file: \\" echo "$all_headers" | tr ' ' '\n' | sort -u | sed -e "s,^, $include_dir/," -e 's,$, \\,' echo } > "$depend_tmp" if cmp -s "$output_tmp" "$output_file"; then rm -f "$output_tmp" "$depend_tmp" else mv -f "$depend_tmp" ".depend.${output_file}" mv -f "$output_tmp" "$output_file" fi fi diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h index 1a35d0c9d685..5ab17340818c 100644 --- a/lib/libsysdecode/sysdecode.h +++ b/lib/libsysdecode/sysdecode.h @@ -1,132 +1,133 @@ /*- * Copyright (c) 2015 John H. Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SYSDECODE_H__ #define __SYSDECODE_H__ enum sysdecode_abi { SYSDECODE_ABI_UNKNOWN = 0, SYSDECODE_ABI_FREEBSD, SYSDECODE_ABI_FREEBSD32, SYSDECODE_ABI_LINUX, SYSDECODE_ABI_LINUX32, }; int sysdecode_abi_to_freebsd_errno(enum sysdecode_abi _abi, int _error); bool sysdecode_access_mode(FILE *_fp, int _mode, int *_rem); const char *sysdecode_acltype(int _type); const char *sysdecode_atfd(int _fd); bool sysdecode_atflags(FILE *_fp, int _flags, int *_rem); bool sysdecode_cap_fcntlrights(FILE *_fp, uint32_t _rights, uint32_t *_rem); void sysdecode_cap_rights(FILE *_fp, cap_rights_t *_rightsp); +bool sysdecode_close_range_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_cmsg_type(int _cmsg_level, int _cmsg_type); const char *sysdecode_extattrnamespace(int _namespace); const char *sysdecode_fadvice(int _advice); void sysdecode_fcntl_arg(FILE *_fp, int _cmd, uintptr_t _arg, int _base); bool sysdecode_fcntl_arg_p(int _cmd); const char *sysdecode_fcntl_cmd(int _cmd); bool sysdecode_fcntl_fileflags(FILE *_fp, int _flags, int *_rem); bool sysdecode_fileflags(FILE *_fp, fflags_t _flags, fflags_t *_rem); bool sysdecode_filemode(FILE *_fp, int _mode, int *_rem); bool sysdecode_flock_operation(FILE *_fp, int _operation, int *_rem); int sysdecode_freebsd_to_abi_errno(enum sysdecode_abi _abi, int _error); const char *sysdecode_getfsstat_mode(int _mode); const char *sysdecode_getrusage_who(int _who); const char *sysdecode_idtype(int _idtype); const char *sysdecode_ioctlname(unsigned long _val); const char *sysdecode_ipproto(int _protocol); void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags, int _base); const char *sysdecode_kevent_filter(int _filter); bool sysdecode_kevent_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_kldsym_cmd(int _cmd); const char *sysdecode_kldunload_flags(int _flags); const char *sysdecode_lio_listio_mode(int _mode); const char *sysdecode_madvice(int _advice); const char *sysdecode_minherit_inherit(int _inherit); const char *sysdecode_msgctl_cmd(int _cmd); bool sysdecode_mlockall_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_mmap_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_mmap_prot(FILE *_fp, int _prot, int *_rem); bool sysdecode_mount_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_msg_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_msync_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_nfssvc_flags(int _flags); bool sysdecode_open_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_pathconf_name(int _name); bool sysdecode_pipe2_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_prio_which(int _which); const char *sysdecode_procctl_cmd(int _cmd); const char *sysdecode_ptrace_request(int _request); bool sysdecode_quotactl_cmd(FILE *_fp, int _cmd); bool sysdecode_reboot_howto(FILE *_fp, int _howto, int *_rem); bool sysdecode_rfork_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_rlimit(int _resource); const char *sysdecode_rtprio_function(int _function); const char *sysdecode_scheduler_policy(int _policy); bool sysdecode_sctp_nxt_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_sctp_pr_policy(int _policy); bool sysdecode_sctp_rcv_flags(FILE *_fp, int _flags, int *_rem); void sysdecode_sctp_sinfo_flags(FILE *_fp, int _sinfo_flags); bool sysdecode_sctp_snd_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_semctl_cmd(int _cmd); bool sysdecode_semget_flags(FILE *_fp, int _flag, int *_rem); bool sysdecode_sendfile_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_shmat_flags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_shmctl_cmd(int _cmd); const char *sysdecode_shutdown_how(int _how); const char *sysdecode_sigbus_code(int _si_code); const char *sysdecode_sigchld_code(int _si_code); const char *sysdecode_sigcode(int _sig, int _si_code); const char *sysdecode_sigfpe_code(int _si_code); const char *sysdecode_sigill_code(int _si_code); const char *sysdecode_signal(int _sig); const char *sysdecode_sigprocmask_how(int _how); const char *sysdecode_sigsegv_code(int _si_code); const char *sysdecode_sigtrap_code(int _si_code); const char *sysdecode_sockaddr_family(int _sa_family); const char *sysdecode_socketdomain(int _domain); const char *sysdecode_socket_protocol(int _domain, int _protocol); bool sysdecode_socket_type(FILE *_fp, int _type, int *_rem); const char *sysdecode_sockopt_level(int _level); const char *sysdecode_sockopt_name(int _level, int _optname); const char *sysdecode_syscallname(enum sysdecode_abi _abi, unsigned int _code); const char *sysdecode_sysarch_number(int _number); bool sysdecode_thr_create_flags(FILE *_fp, int _flags, int *_rem); bool sysdecode_umtx_cvwait_flags(FILE *_fp, u_long _flags, u_long *_rem); const char *sysdecode_umtx_op(int _op); bool sysdecode_umtx_op_flags(FILE *_fp, int op, int *_rem); bool sysdecode_umtx_rwlock_flags(FILE *_fp, u_long _flags, u_long *_rem); int sysdecode_utrace(FILE *_fp, void *_buf, size_t _len); bool sysdecode_vmprot(FILE *_fp, int _type, int *_rem); const char *sysdecode_vmresult(int _result); bool sysdecode_wait4_options(FILE *_fp, int _options, int *_rem); bool sysdecode_wait6_options(FILE *_fp, int _options, int *_rem); const char *sysdecode_whence(int _whence); bool sysdecode_shmflags(FILE *_fp, int _flags, int *_rem); #endif /* !__SYSDECODE_H__ */ diff --git a/lib/libsysdecode/sysdecode_mask.3 b/lib/libsysdecode/sysdecode_mask.3 index 54e182db31d7..45464e333eb4 100644 --- a/lib/libsysdecode/sysdecode_mask.3 +++ b/lib/libsysdecode/sysdecode_mask.3 @@ -1,242 +1,245 @@ .\" .\" Copyright (c) 2016 John Baldwin .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 16, 2018 +.Dd March 3, 2022 .Dt sysdecode_mask 3 .Os .Sh NAME .Nm sysdecode_mask , .Nm sysdecode_accessmode , .Nm sysdecode_atflags , .Nm sysdecode_capfcntlrights , +.Nm sysdecode_close_range_flags , .Nm sysdecode_fcntl_fileflags , .Nm sysdecode_fileflags , .Nm sysdecode_filemode , .Nm sysdecode_flock_operation , .Nm sysdecode_mlockall_flags , .Nm sysdecode_mmap_flags , .Nm sysdecode_mmap_prot , .Nm sysdecode_mount_flags , .Nm sysdecode_msg_flags , .Nm sysdecode_msync_flags , .Nm sysdecode_open_flags , .Nm sysdecode_pipe2_flags , .Nm sysdecode_reboot_howto , .Nm sysdecode_rfork_flags , .Nm sysdecode_semget_flags , .Nm sysdecode_sendfile_flags , .Nm sysdecode_shmat_flags , .Nm sysdecode_sctp_nxt_flags , .Nm sysdecode_sctp_rcv_flags , .Nm sysdecode_sctp_snd_flags , .Nm sysdecode_socket_type , .Nm sysdecode_thr_create_flags , .Nm sysdecode_umtx_cvwait_flags , .Nm sysdecode_umtx_rwlock_flags , .Nm sysdecode_vmprot , .Nm sysdecode_wait4_options , .Nm sysdecode_wait6_options .Nd print name of various bitmask values .Sh LIBRARY .Lb libsysdecode .Sh SYNOPSIS .In sys/types.h .In stdbool.h .In stdio.h .In sysdecode.h .Ft bool .Fn sysdecode_access_mode "FILE *fp" "int mode" "int *rem" .Ft bool .Fn sysdecode_atflags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_cap_fcntlrights "FILE *fp" "uint32_t rights" "uint32_t *rem" .Ft bool +.Fn sysdecode_close_range_flags "FILE *fp" "int flags" "int *rem" +.Ft bool .Fn sysdecode_fcntl_fileflags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_fileflags "FILE *fp" "fflags_t flags" "fflags_t *rem" .Ft bool .Fn sysdecode_filemode "FILE *fp" "int mode" "int *rem" .Ft bool .Fn sysdecode_flock_operation "FILE *fp" "int operation" "int *rem" .Ft bool .Fn sysdecode_mlockall_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_mmap_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_mmap_prot "FILE *fp" "int prot" "int *rem" .Ft bool .Fn sysdecode_mount_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_msg_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_msync_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_open_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_pipe2_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_reboot_howto "FILE *fp" "int howto" "int *rem" .Ft bool .Fn sysdecode_rfork_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_nxt_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_rcv_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sctp_snd_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_semget_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_sendfile_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_shmat_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_socket_type "FILE *fp" "int type" "int *rem" .Ft bool .Fn sysdecode_thr_create_flags "FILE *fp" "int flags" "int *rem" .Ft bool .Fn sysdecode_umtx_cvwait_flags "FILE *fp" "u_long flags" "u_long *rem" .Ft bool .Fn sysdecode_umtx_rwlock_flags "FILE *fp" "u_long flags" "u_long *rem" .Ft bool .Fn sysdecode_vmprot "FILE *fp" "int type" "int *rem" .Ft bool .Fn sysdecode_wait4_options "FILE *fp" "int options" "int *rem" .Ft bool .Fn sysdecode_wait6_options "FILE *fp" "int options" "int *rem" .Sh DESCRIPTION The .Nm functions are used to generate a text description of an integer value built from a mask of bitfields. The text description lists the C macros for field values joined by pipe .Sq | characters matching the format used in C source code. Most of the values decoded by these functions are passed as arguments to system calls, though some of these values are used internally in the kernel. .Pp Each function writes the text description to .Fa fp . The second argument should contain the integer value to be decoded. The .Fa rem argument is set to the value of any bits that were not decoded .Pq bit fields that do not have a corresponding C macro . .Fa rem may be set to .Dv NULL if the caller does not need this value. Each function returns .Dv true if any bit fields in the value were decoded and .Dv false if no bit fields were decoded. .Pp Most of these functions decode an argument passed to a system call: .Bl -column "Fn sysdecode_flock_operation" "Xr cap_fcntls_limit 2" .It Sy Function Ta Sy System Call Ta Sy Argument .It Fn sysdecode_access_mode Ta Xr access 2 Ta Fa mode .It Fn sysdecode_atflags Ta Xr chflagsat 2 , Xr fstatat 2 Ta Fa atflag , Fa flag .It Fn sysdecode_cap_fcntlrights Ta Xr cap_fcntls_limit 2 Ta Fa fcntlrights .It Fn sysdecode_fileflags Ta Xr chflags 2 Ta Fa flags .It Fn sysdecode_filemode Ta Xr chmod 2 , Xr open 2 Ta mode .It Fn sysdecode_flock_operation Ta Xr flock 2 Ta Fa operation .It Fn sysdecode_mlockall_flags Ta Xr mlockall 2 Ta Fa flags .It Fn sysdecode_mmap_flags Ta Xr mmap 2 Ta Fa flags .It Fn sysdecode_mmap_prot Ta Xr mmap 2 Ta Fa prot .It Fn sysdecode_mount_flags Ta Xr mount 2 Ta Fa flags .It Fn sysdecode_msg_flags Ta Xr recv 2 , Xr send 2 Ta Fa flags .It Fn sysdecode_msync_flags Ta Xr msync 2 Ta Fa flags .It Fn sysdecode_open_flags Ta Xr open 2 Ta Fa flags .It Fn sysdecode_pipe2_flags Ta Xr pipe2 Ta Fa flags .It Fn sysdecode_reboot_howto Ta Xr reboot 2 Ta Fa howto .It Fn sysdecode_rfork_flags Ta Xr rfork 2 Ta Fa flags .It Fn sysdecode_semget_flags Ta Xr semget 2 Ta Fa flags .It Fn sysdecode_sendfile_flags Ta Xr sendfile 2 Ta Fa flags .It Fn sysdecode_shmat_flags Ta Xr shmat 2 Ta Fa flags .It Fn sysdecode_socket_type Ta Xr socket 2 Ta Fa type .It Fn sysdecode_thr_create_flags Ta Xr thr_create 2 Ta Fa flags .It Fn sysdecode_wait4_options Ta Xr wait4 2 Ta Fa options .It Fn sysdecode_wait6_options Ta Xr wait6 2 Ta Fa options .El .Pp Other functions decode the values described below: .Bl -tag -width ".Fn sysdecode_umtx_cvwait_flags" .It Fn sysdecode_fcntl_fileflags The file flags used with the .Dv F_GETFL and .Dv F_SETFL .Xr fcntl 2 commands. .It Fn sysdecode_sctp_nxt_flags The .Fa nxt_flags member of a .Vt struct sctp_nxtinfo . .It Fn sysdecode_sctp_rcv_flags The .Fa rcv_flags member of a .Vt struct sctp_rcvinfo . .It Fn sysdecode_sctp_snd_flags The .Fa snd_flags member of a .Vt struct sctp_sndinfo . .It Fn sysdecode_umtx_cvwait_flags The .Fa val argument to .Xr _umtx_op 2 for .Dv UMTX_OP_CV_WAIT operations. .It Fn sysdecode_umtx_rwlock_flags The .Fa val argument to .Xr _umtx_op 2 for .Dv UMTX_OP_RW_RDLOCK operations. .It Fn sysdecode_vmprot The memory protection flags stored in .Vt vm_prot_t variables. .El .Sh RETURN VALUES The .Nm functions return .Dv true if any bit fields in the value were decoded and .Dv false if no bit fields were decoded. .Sh SEE ALSO .Xr sysdecode 3 , .Xr sysdecode_enum 3 diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ed9d0bc3c253..07984b9a4640 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1,5143 +1,5178 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes"); static MALLOC_DEFINE(M_PWDDESC, "pwddesc", "Pwd descriptors"); static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", "file desc to leader structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities"); MALLOC_DECLARE(M_FADVISE); static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; __read_mostly uma_zone_t pwd_zone; VFS_SMR_DECLARE; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, bool holdleaders, bool audit); static void export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp, struct kinfo_file *kif, struct filedesc *fdp, int flags); static int fd_first_free(struct filedesc *fdp, int low, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); static void fdgrowtable_exp(struct filedesc *fdp, int nfd); static void fdunused(struct filedesc *fdp, int fd); static void fdused(struct filedesc *fdp, int fd); static int fget_unlocked_seq(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, seqc_t *seqp); static int getmaxfd(struct thread *td); static u_long *filecaps_copy_prep(const struct filecaps *src); static void filecaps_copy_finish(const struct filecaps *src, struct filecaps *dst, u_long *ioctls); static u_long *filecaps_free_prep(struct filecaps *fcaps); static void filecaps_free_finish(u_long *ioctls); static struct pwd *pwd_alloc(void); /* * Each process has: * * - An array of open file descriptors (fd_ofiles) * - An array of file flags (fd_ofileflags) * - A bitmap recording which descriptors are in use (fd_map) * * A process starts out with NDFILE descriptors. The value of NDFILE has * been selected based the historical limit of 20 open files, and an * assumption that the majority of processes, especially short-lived * processes like shells, will never need more. * * If this initial allocation is exhausted, a larger descriptor table and * map are allocated dynamically, and the pointers in the process's struct * filedesc are updated to point to those. This is repeated every time * the process runs out of file descriptors (provided it hasn't hit its * resource limit). * * Since threads may hold references to individual descriptor table * entries, the tables are never freed. Instead, they are placed on a * linked list and freed only when the struct filedesc is released. */ #define NDFILE 20 #define NDSLOTSIZE sizeof(NDSLOTTYPE) #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) #define NDSLOT(x) ((x) / NDENTRIES) #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) /* * SLIST entry used to keep track of ofiles which must be reclaimed when * the process exits. */ struct freetable { struct fdescenttbl *ft_table; SLIST_ENTRY(freetable) ft_next; }; /* * Initial allocation: a filedesc structure + the head of SLIST used to * keep track of old ofiles + enough space for NDFILE descriptors. */ struct fdescenttbl0 { int fdt_nfiles; struct filedescent fdt_ofiles[NDFILE]; }; struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; struct fdescenttbl0 fd_dfiles; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; /* * Descriptor management. */ static int __exclusive_cache_line openfiles; /* actual number of open files */ struct mtx sigio_lock; /* mtx to protect pointers to sigio */ void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp); /* * If low >= size, just return low. Otherwise find the first zero bit in the * given bitmap, starting at low and not exceeding size - 1. Return size if * not found. */ static int fd_first_free(struct filedesc *fdp, int low, int size) { NDSLOTTYPE *map = fdp->fd_map; NDSLOTTYPE mask; int off, maxoff; if (low >= size) return (low); off = NDSLOT(low); if (low % NDENTRIES) { mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); if ((mask &= ~map[off]) != 0UL) return (off * NDENTRIES + ffsl(mask) - 1); ++off; } for (maxoff = NDSLOTS(size); off < maxoff; ++off) if (map[off] != ~0UL) return (off * NDENTRIES + ffsl(~map[off]) - 1); return (size); } /* * Find the last used fd. * * Call this variant if fdp can't be modified by anyone else (e.g, during exec). * Otherwise use fdlastfile. */ int fdlastfile_single(struct filedesc *fdp) { NDSLOTTYPE *map = fdp->fd_map; int off, minoff; off = NDSLOT(fdp->fd_nfiles - 1); for (minoff = NDSLOT(0); off >= minoff; --off) if (map[off] != 0) return (off * NDENTRIES + flsl(map[off]) - 1); return (-1); } int fdlastfile(struct filedesc *fdp) { FILEDESC_LOCK_ASSERT(fdp); return (fdlastfile_single(fdp)); } static int fdisused(struct filedesc *fdp, int fd) { KASSERT(fd >= 0 && fd < fdp->fd_nfiles, ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); } /* * Mark a file descriptor as used. */ static void fdused_init(struct filedesc *fdp, int fd) { KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd)); fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); } static void fdused(struct filedesc *fdp, int fd) { FILEDESC_XLOCK_ASSERT(fdp); fdused_init(fdp, fd); if (fd == fdp->fd_freefile) fdp->fd_freefile++; } /* * Mark a file descriptor as unused. */ static void fdunused(struct filedesc *fdp, int fd) { FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; } /* * Free a file descriptor. * * Avoid some work if fdp is about to be destroyed. */ static inline void fdefree_last(struct filedescent *fde) { filecaps_free(&fde->fde_caps); } static inline void fdfree(struct filedesc *fdp, int fd) { struct filedescent *fde; FILEDESC_XLOCK_ASSERT(fdp); fde = &fdp->fd_ofiles[fd]; #ifdef CAPABILITIES seqc_write_begin(&fde->fde_seqc); #endif fde->fde_file = NULL; #ifdef CAPABILITIES seqc_write_end(&fde->fde_seqc); #endif fdefree_last(fde); fdunused(fdp, fd); } /* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct getdtablesize_args { int dummy; }; #endif /* ARGSUSED */ int sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap) { #ifdef RACCT uint64_t lim; #endif td->td_retval[0] = getmaxfd(td); #ifdef RACCT PROC_LOCK(td->td_proc); lim = racct_get_limit(td->td_proc, RACCT_NOFILE); PROC_UNLOCK(td->td_proc); if (lim < td->td_retval[0]) td->td_retval[0] = lim; #endif return (0); } /* * Duplicate a file descriptor to a particular value. * * Note: keep in mind that a potential race condition exists when closing * descriptors from a shared descriptor table (via rfork). */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { u_int from; u_int to; }; #endif /* ARGSUSED */ int sys_dup2(struct thread *td, struct dup2_args *uap) { return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to)); } /* * Duplicate a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct dup_args { u_int fd; }; #endif /* ARGSUSED */ int sys_dup(struct thread *td, struct dup_args *uap) { return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0)); } /* * The file control system call. */ #ifndef _SYS_SYSPROTO_H_ struct fcntl_args { int fd; int cmd; long arg; }; #endif /* ARGSUSED */ int sys_fcntl(struct thread *td, struct fcntl_args *uap) { return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg)); } int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg) { struct flock fl; struct __oflock ofl; intptr_t arg1; int error, newcmd; error = 0; newcmd = cmd; switch (cmd) { case F_OGETLK: case F_OSETLK: case F_OSETLKW: /* * Convert old flock structure to new. */ error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl)); fl.l_start = ofl.l_start; fl.l_len = ofl.l_len; fl.l_pid = ofl.l_pid; fl.l_type = ofl.l_type; fl.l_whence = ofl.l_whence; fl.l_sysid = 0; switch (cmd) { case F_OGETLK: newcmd = F_GETLK; break; case F_OSETLK: newcmd = F_SETLK; break; case F_OSETLKW: newcmd = F_SETLKW; break; } arg1 = (intptr_t)&fl; break; case F_GETLK: case F_SETLK: case F_SETLKW: case F_SETLK_REMOTE: error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl)); arg1 = (intptr_t)&fl; break; default: arg1 = arg; break; } if (error) return (error); error = kern_fcntl(td, fd, newcmd, arg1); if (error) return (error); if (cmd == F_OGETLK) { ofl.l_start = fl.l_start; ofl.l_len = fl.l_len; ofl.l_pid = fl.l_pid; ofl.l_type = fl.l_type; ofl.l_whence = fl.l_whence; error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl)); } else if (cmd == F_GETLK) { error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl)); } return (error); } int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; struct file *fp, *fp2; struct filedescent *fde; struct proc *p; struct vnode *vp; struct mount *mp; struct kinfo_file *kif; int error, flg, kif_sz, seals, tmp; uint64_t bsize; off_t foffset; error = 0; flg = F_POSIX; p = td->td_proc; fdp = p->p_fd; AUDIT_ARG_FD(cmd); AUDIT_ARG_CMD(cmd); switch (cmd) { case F_DUPFD: tmp = arg; error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp); break; case F_DUPFD_CLOEXEC: tmp = arg; error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp); break; case F_DUP2FD: tmp = arg; error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp); break; case F_DUP2FD_CLOEXEC: tmp = arg; error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp); break; case F_GETFD: error = EBADF; FILEDESC_SLOCK(fdp); fde = fdeget_noref(fdp, fd); if (fde != NULL) { td->td_retval[0] = (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; error = 0; } FILEDESC_SUNLOCK(fdp); break; case F_SETFD: error = EBADF; FILEDESC_XLOCK(fdp); fde = fdeget_noref(fdp, fd); if (fde != NULL) { fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); error = 0; } FILEDESC_XUNLOCK(fdp); break; case F_GETFL: error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETFL, &fp); if (error != 0) break; td->td_retval[0] = OFLAGS(fp->f_flag); fdrop(fp, td); break; case F_SETFL: error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp); if (error != 0) break; if (fp->f_ops == &path_fileops) { fdrop(fp, td); error = EBADF; break; } do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; } while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); if (error != 0) { fdrop(fp, td); break; } tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); if (error == 0) { fdrop(fp, td); break; } atomic_clear_int(&fp->f_flag, FNONBLOCK); tmp = 0; (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); fdrop(fp, td); break; case F_GETOWN: error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETOWN, &fp); if (error != 0) break; error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; fdrop(fp, td); break; case F_SETOWN: error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETOWN, &fp); if (error != 0) break; tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); break; case F_SETLK_REMOTE: error = priv_check(td, PRIV_NFS_LOCKD); if (error != 0) return (error); flg = F_REMOTE; goto do_setlk; case F_SETLKW: flg |= F_WAIT; /* FALLTHROUGH F_SETLK */ case F_SETLK: do_setlk: flp = (struct flock *)arg; if ((flg & F_REMOTE) != 0 && flp->l_sysid == 0) { error = EINVAL; break; } error = fget_unlocked(td, fd, &cap_flock_rights, &fp); if (error != 0) break; if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { error = EBADF; fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); if (foffset < 0 || (flp->l_start > 0 && foffset > OFF_MAX - flp->l_start)) { error = EOVERFLOW; fdrop(fp, td); break; } flp->l_start += foffset; } vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) { error = EBADF; break; } if ((p->p_leader->p_flag & P_ADVLOCK) == 0) { PROC_LOCK(p->p_leader); p->p_leader->p_flag |= P_ADVLOCK; PROC_UNLOCK(p->p_leader); } error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, flp, flg); break; case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) { error = EBADF; break; } if ((p->p_leader->p_flag & P_ADVLOCK) == 0) { PROC_LOCK(p->p_leader); p->p_leader->p_flag |= P_ADVLOCK; PROC_UNLOCK(p->p_leader); } error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, flp, flg); break; case F_UNLCK: error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, flg); break; case F_UNLCKSYS: if (flg != F_REMOTE) { error = EINVAL; break; } error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCKSYS, flp, flg); break; default: error = EINVAL; break; } if (error != 0 || flp->l_type == F_UNLCK || flp->l_type == F_UNLCKSYS) { fdrop(fp, td); break; } /* * Check for a race with close. * * The vnode is now advisory locked (or unlocked, but this case * is not really important) as the caller requested. * We had to drop the filedesc lock, so we need to recheck if * the descriptor is still valid, because if it was closed * in the meantime we need to remove advisory lock from the * vnode - close on any descriptor leading to an advisory * locked vnode, removes that lock. * We will return 0 on purpose in that case, as the result of * successful advisory lock might have been externally visible * already. This is fine - effectively we pretend to the caller * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ error = fget_unlocked(td, fd, &cap_no_rights, &fp2); if (error != 0) { fdrop(fp, td); break; } if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); } fdrop(fp, td); fdrop(fp2, td); break; case F_GETLK: error = fget_unlocked(td, fd, &cap_flock_rights, &fp); if (error != 0) break; if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { error = EBADF; fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { error = EINVAL; fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); if ((flp->l_start > 0 && foffset > OFF_MAX - flp->l_start) || (flp->l_start < 0 && foffset < OFF_MIN - flp->l_start)) { error = EOVERFLOW; fdrop(fp, td); break; } flp->l_start += foffset; } vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); fdrop(fp, td); break; case F_ADD_SEALS: error = fget_unlocked(td, fd, &cap_no_rights, &fp); if (error != 0) break; error = fo_add_seals(fp, arg); fdrop(fp, td); break; case F_GET_SEALS: error = fget_unlocked(td, fd, &cap_no_rights, &fp); if (error != 0) break; if (fo_get_seals(fp, &seals) == 0) td->td_retval[0] = seals; else error = EINVAL; fdrop(fp, td); break; case F_RDAHEAD: arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: error = fget_unlocked(td, fd, &cap_no_rights, &fp); if (error != 0) break; if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { fdrop(fp, td); error = EBADF; break; } vp = fp->f_vnode; if (vp->v_type != VREG) { fdrop(fp, td); error = ENOTTY; break; } /* * Exclusive lock synchronizes against f_seqcount reads and * writes in sequential_heuristic(). */ error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) { fdrop(fp, td); break; } if (arg >= 0) { bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; arg = MIN(arg, INT_MAX - bsize + 1); fp->f_seqcount[UIO_READ] = MIN(IO_SEQMAX, (arg + bsize - 1) / bsize); atomic_set_int(&fp->f_flag, FRDAHEAD); } else { atomic_clear_int(&fp->f_flag, FRDAHEAD); } VOP_UNLOCK(vp); fdrop(fp, td); break; case F_ISUNIONSTACK: /* * Check if the vnode is part of a union stack (either the * "union" flag from mount(2) or unionfs). * * Prior to introduction of this op libc's readdir would call * fstatfs(2), in effect unnecessarily copying kilobytes of * data just to check fs name and a mount flag. * * Fixing the code to handle everything in the kernel instead * is a non-trivial endeavor and has low priority, thus this * horrible kludge facilitates the current behavior in a much * cheaper manner until someone(tm) sorts this out. */ error = fget_unlocked(td, fd, &cap_no_rights, &fp); if (error != 0) break; if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); error = EBADF; break; } vp = fp->f_vnode; /* * Since we don't prevent dooming the vnode even non-null mp * found can become immediately stale. This is tolerable since * mount points are type-stable (providing safe memory access) * and any vfs op on this vnode going forward will return an * error (meaning return value in this case is meaningless). */ mp = atomic_load_ptr(&vp->v_mount); if (__predict_false(mp == NULL)) { fdrop(fp, td); error = EBADF; break; } td->td_retval[0] = 0; if (mp->mnt_kern_flag & MNTK_UNIONFS || mp->mnt_flag & MNT_UNION) td->td_retval[0] = 1; fdrop(fp, td); break; case F_KINFO: #ifdef CAPABILITY_MODE if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; break; } #endif error = copyin((void *)arg, &kif_sz, sizeof(kif_sz)); if (error != 0) break; if (kif_sz != sizeof(*kif)) { error = EINVAL; break; } kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK | M_ZERO); FILEDESC_SLOCK(fdp); error = fget_cap_noref(fdp, fd, &cap_fcntl_rights, &fp, NULL); if (error == 0 && fhold(fp)) { export_file_to_kinfo(fp, fd, NULL, kif, fdp, 0); FILEDESC_SUNLOCK(fdp); fdrop(fp, td); if ((kif->kf_status & KF_ATTR_VALID) != 0) { kif->kf_structsize = sizeof(*kif); error = copyout(kif, (void *)arg, sizeof(*kif)); } else { error = EBADF; } } else { FILEDESC_SUNLOCK(fdp); if (error == 0) error = EBADF; } free(kif, M_TEMP); break; default: error = EINVAL; break; } return (error); } static int getmaxfd(struct thread *td) { return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc)); } /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ int kern_dup(struct thread *td, u_int mode, int flags, int old, int new) { struct filedesc *fdp; struct filedescent *oldfde, *newfde; struct proc *p; struct file *delfp, *oldfp; u_long *oioctls, *nioctls; int error, maxfd; p = td->td_proc; fdp = p->p_fd; oioctls = NULL; MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0); MPASS(mode < FDDUP_LASTMODE); AUDIT_ARG_FD(old); /* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */ /* * Verify we have a valid descriptor to dup from and possibly to * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should * return EINVAL when the new descriptor is out of bounds. */ if (old < 0) return (EBADF); if (new < 0) return (mode == FDDUP_FCNTL ? EINVAL : EBADF); maxfd = getmaxfd(td); if (new >= maxfd) return (mode == FDDUP_FCNTL ? EINVAL : EBADF); error = EBADF; FILEDESC_XLOCK(fdp); if (fget_noref(fdp, old) == NULL) goto unlock; if (mode == FDDUP_FIXED && old == new) { td->td_retval[0] = new; if (flags & FDDUP_FLAG_CLOEXEC) fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; error = 0; goto unlock; } oldfde = &fdp->fd_ofiles[old]; oldfp = oldfde->fde_file; if (!fhold(oldfp)) goto unlock; /* * If the caller specified a file descriptor, make sure the file * table is large enough to hold it, and grab it. Otherwise, just * allocate a new descriptor the usual way. */ switch (mode) { case FDDUP_NORMAL: case FDDUP_FCNTL: if ((error = fdalloc(td, new, &new)) != 0) { fdrop(oldfp, td); goto unlock; } break; case FDDUP_FIXED: if (new >= fdp->fd_nfiles) { /* * The resource limits are here instead of e.g. * fdalloc(), because the file descriptor table may be * shared between processes, so we can't really use * racct_add()/racct_sub(). Instead of counting the * number of actually allocated descriptors, just put * the limit on the size of the file descriptor table. */ #ifdef RACCT if (RACCT_ENABLED()) { error = racct_set_unlocked(p, RACCT_NOFILE, new + 1); if (error != 0) { error = EMFILE; fdrop(oldfp, td); goto unlock; } } #endif fdgrowtable_exp(fdp, new + 1); } if (!fdisused(fdp, new)) fdused(fdp, new); break; default: KASSERT(0, ("%s unsupported mode %d", __func__, mode)); } KASSERT(old != new, ("new fd is same as old")); /* Refetch oldfde because the table may have grown and old one freed. */ oldfde = &fdp->fd_ofiles[old]; KASSERT(oldfp == oldfde->fde_file, ("fdt_ofiles shift from growth observed at fd %d", old)); newfde = &fdp->fd_ofiles[new]; delfp = newfde->fde_file; nioctls = filecaps_copy_prep(&oldfde->fde_caps); /* * Duplicate the source descriptor. */ #ifdef CAPABILITIES seqc_write_begin(&newfde->fde_seqc); #endif oioctls = filecaps_free_prep(&newfde->fde_caps); fde_copy(oldfde, newfde); filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps, nioctls); if ((flags & FDDUP_FLAG_CLOEXEC) != 0) newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; #ifdef CAPABILITIES seqc_write_end(&newfde->fde_seqc); #endif td->td_retval[0] = new; error = 0; if (delfp != NULL) { (void) closefp(fdp, new, delfp, td, true, false); FILEDESC_UNLOCK_ASSERT(fdp); } else { unlock: FILEDESC_XUNLOCK(fdp); } filecaps_free_finish(oioctls); return (error); } static void sigiofree(struct sigio *sigio) { crfree(sigio->sio_ucred); free(sigio, M_SIGIO); } static struct sigio * funsetown_locked(struct sigio *sigio) { struct proc *p; struct pgrp *pg; SIGIO_ASSERT_LOCKED(); if (sigio == NULL) return (NULL); *sigio->sio_myref = NULL; if (sigio->sio_pgid < 0) { pg = sigio->sio_pgrp; PGRP_LOCK(pg); SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio); PGRP_UNLOCK(pg); } else { p = sigio->sio_proc; PROC_LOCK(p); SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio); PROC_UNLOCK(p); } return (sigio); } /* * If sigio is on the list associated with a process or process group, * disable signalling from the device, remove sigio from the list and * free sigio. */ void funsetown(struct sigio **sigiop) { struct sigio *sigio; /* Racy check, consumers must provide synchronization. */ if (*sigiop == NULL) return; SIGIO_LOCK(); sigio = funsetown_locked(*sigiop); SIGIO_UNLOCK(); if (sigio != NULL) sigiofree(sigio); } /* * Free a list of sigio structures. The caller must ensure that new sigio * structures cannot be added after this point. For process groups this is * guaranteed using the proctree lock; for processes, the P_WEXIT flag serves * as an interlock. */ void funsetownlst(struct sigiolst *sigiolst) { struct proc *p; struct pgrp *pg; struct sigio *sigio, *tmp; /* Racy check. */ sigio = SLIST_FIRST(sigiolst); if (sigio == NULL) return; p = NULL; pg = NULL; SIGIO_LOCK(); sigio = SLIST_FIRST(sigiolst); if (sigio == NULL) { SIGIO_UNLOCK(); return; } /* * Every entry of the list should belong to a single proc or pgrp. */ if (sigio->sio_pgid < 0) { pg = sigio->sio_pgrp; sx_assert(&proctree_lock, SX_XLOCKED); PGRP_LOCK(pg); } else /* if (sigio->sio_pgid > 0) */ { p = sigio->sio_proc; PROC_LOCK(p); KASSERT((p->p_flag & P_WEXIT) != 0, ("%s: process %p is not exiting", __func__, p)); } SLIST_FOREACH(sigio, sigiolst, sio_pgsigio) { *sigio->sio_myref = NULL; if (pg != NULL) { KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list")); KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list")); } else /* if (p != NULL) */ { KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list")); KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list")); } } if (pg != NULL) PGRP_UNLOCK(pg); else PROC_UNLOCK(p); SIGIO_UNLOCK(); SLIST_FOREACH_SAFE(sigio, sigiolst, sio_pgsigio, tmp) sigiofree(sigio); } /* * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). * * After permission checking, add a sigio structure to the sigio list for * the process or process group. */ int fsetown(pid_t pgid, struct sigio **sigiop) { struct proc *proc; struct pgrp *pgrp; struct sigio *osigio, *sigio; int ret; if (pgid == 0) { funsetown(sigiop); return (0); } sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK); sigio->sio_pgid = pgid; sigio->sio_ucred = crhold(curthread->td_ucred); sigio->sio_myref = sigiop; ret = 0; if (pgid > 0) { ret = pget(pgid, PGET_NOTWEXIT | PGET_NOTID | PGET_HOLD, &proc); SIGIO_LOCK(); osigio = funsetown_locked(*sigiop); if (ret == 0) { PROC_LOCK(proc); _PRELE(proc); if ((proc->p_flag & P_WEXIT) != 0) { ret = ESRCH; } else if (proc->p_session != curthread->td_proc->p_session) { /* * Policy - Don't allow a process to FSETOWN a * process in another session. * * Remove this test to allow maximum flexibility * or restrict FSETOWN to the current process or * process group for maximum safety. */ ret = EPERM; } else { sigio->sio_proc = proc; SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); } PROC_UNLOCK(proc); } } else /* if (pgid < 0) */ { sx_slock(&proctree_lock); SIGIO_LOCK(); osigio = funsetown_locked(*sigiop); pgrp = pgfind(-pgid); if (pgrp == NULL) { ret = ESRCH; } else { if (pgrp->pg_session != curthread->td_proc->p_session) { /* * Policy - Don't allow a process to FSETOWN a * process in another session. * * Remove this test to allow maximum flexibility * or restrict FSETOWN to the current process or * process group for maximum safety. */ ret = EPERM; } else { sigio->sio_pgrp = pgrp; SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); } PGRP_UNLOCK(pgrp); } sx_sunlock(&proctree_lock); } if (ret == 0) *sigiop = sigio; SIGIO_UNLOCK(); if (osigio != NULL) sigiofree(osigio); return (ret); } /* * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). */ pid_t fgetown(struct sigio **sigiop) { pid_t pgid; SIGIO_LOCK(); pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; SIGIO_UNLOCK(); return (pgid); } static int closefp_impl(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, bool audit) { int error; FILEDESC_XLOCK_ASSERT(fdp); /* * We now hold the fp reference that used to be owned by the * descriptor array. We have to unlock the FILEDESC *AFTER* * knote_fdclose to prevent a race of the fd getting opened, a knote * added, and deleteing a knote for the new fd. */ if (__predict_false(!TAILQ_EMPTY(&fdp->fd_kqlist))) knote_fdclose(td, fd); /* * We need to notify mqueue if the object is of type mqueue. */ if (__predict_false(fp->f_type == DTYPE_MQUEUE)) mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); #ifdef AUDIT if (AUDITING_TD(td) && audit) audit_sysclose(td, fd, fp); #endif error = closef(fp, td); /* * All paths leading up to closefp() will have already removed or * replaced the fd in the filedesc table, so a restart would not * operate on the same file. */ if (error == ERESTART) error = EINTR; return (error); } static int closefp_hl(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, bool holdleaders, bool audit) { int error; FILEDESC_XLOCK_ASSERT(fdp); if (holdleaders) { if (td->td_proc->p_fdtol != NULL) { /* * Ask fdfree() to sleep to ensure that all relevant * process leaders can be traversed in closef(). */ fdp->fd_holdleaderscount++; } else { holdleaders = false; } } error = closefp_impl(fdp, fd, fp, td, audit); if (holdleaders) { FILEDESC_XLOCK(fdp); fdp->fd_holdleaderscount--; if (fdp->fd_holdleaderscount == 0 && fdp->fd_holdleaderswakeup != 0) { fdp->fd_holdleaderswakeup = 0; wakeup(&fdp->fd_holdleaderscount); } FILEDESC_XUNLOCK(fdp); } return (error); } static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, bool holdleaders, bool audit) { FILEDESC_XLOCK_ASSERT(fdp); if (__predict_false(td->td_proc->p_fdtol != NULL)) { return (closefp_hl(fdp, fd, fp, td, holdleaders, audit)); } else { return (closefp_impl(fdp, fd, fp, td, audit)); } } /* * Close a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct close_args { int fd; }; #endif /* ARGSUSED */ int sys_close(struct thread *td, struct close_args *uap) { return (kern_close(td, uap->fd)); } int kern_close(struct thread *td, int fd) { struct filedesc *fdp; struct file *fp; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); if ((fp = fget_noref(fdp, fd)) == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, true, true)); } -int -kern_close_range(struct thread *td, u_int lowfd, u_int highfd) +static int +close_range_cloexec(struct thread *td, u_int lowfd, u_int highfd) { struct filedesc *fdp; - const struct fdescenttbl *fdt; - struct file *fp; + struct fdescenttbl *fdt; + struct filedescent *fde; int fd; - /* - * Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2 - * open should not be a usage error. From a close_range() perspective, - * close_range(3, ~0U, 0) in the same scenario should also likely not - * be a usage error as all fd above 3 are in-fact already closed. - */ - if (highfd < lowfd) { - return (EINVAL); + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + fdt = atomic_load_ptr(&fdp->fd_files); + highfd = MIN(highfd, fdt->fdt_nfiles - 1); + fd = lowfd; + if (__predict_false(fd > highfd)) { + goto out_locked; + } + for (; fd <= highfd; fd++) { + fde = &fdt->fdt_ofiles[fd]; + if (fde->fde_file != NULL) + fde->fde_flags |= UF_EXCLOSE; } +out_locked: + FILEDESC_XUNLOCK(fdp); + return (0); +} + +static int +close_range_impl(struct thread *td, u_int lowfd, u_int highfd) +{ + struct filedesc *fdp; + const struct fdescenttbl *fdt; + struct file *fp; + int fd; fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdt = atomic_load_ptr(&fdp->fd_files); highfd = MIN(highfd, fdt->fdt_nfiles - 1); fd = lowfd; if (__predict_false(fd > highfd)) { goto out_locked; } for (;;) { fp = fdt->fdt_ofiles[fd].fde_file; if (fp == NULL) { if (fd == highfd) goto out_locked; } else { fdfree(fdp, fd); (void) closefp(fdp, fd, fp, td, true, true); if (fd == highfd) goto out_unlocked; FILEDESC_XLOCK(fdp); fdt = atomic_load_ptr(&fdp->fd_files); } fd++; } out_locked: FILEDESC_XUNLOCK(fdp); out_unlocked: return (0); } +int +kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd) +{ + + /* + * Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2 + * open should not be a usage error. From a close_range() perspective, + * close_range(3, ~0U, 0) in the same scenario should also likely not + * be a usage error as all fd above 3 are in-fact already closed. + */ + if (highfd < lowfd) { + return (EINVAL); + } + + if ((flags & CLOSE_RANGE_CLOEXEC) != 0) + return (close_range_cloexec(td, lowfd, highfd)); + + return (close_range_impl(td, lowfd, highfd)); +} + #ifndef _SYS_SYSPROTO_H_ struct close_range_args { u_int lowfd; u_int highfd; int flags; }; #endif int sys_close_range(struct thread *td, struct close_range_args *uap) { AUDIT_ARG_FD(uap->lowfd); AUDIT_ARG_CMD(uap->highfd); AUDIT_ARG_FFLAGS(uap->flags); - /* No flags currently defined */ - if (uap->flags != 0) + if ((uap->flags & ~(CLOSE_RANGE_CLOEXEC)) != 0) return (EINVAL); - return (kern_close_range(td, uap->lowfd, uap->highfd)); + return (kern_close_range(td, uap->flags, uap->lowfd, uap->highfd)); } #ifdef COMPAT_FREEBSD12 /* * Close open file descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd12_closefrom_args { int lowfd; }; #endif /* ARGSUSED */ int freebsd12_closefrom(struct thread *td, struct freebsd12_closefrom_args *uap) { u_int lowfd; AUDIT_ARG_FD(uap->lowfd); /* * Treat negative starting file descriptor values identical to * closefrom(0) which closes all files. */ lowfd = MAX(0, uap->lowfd); - return (kern_close_range(td, lowfd, ~0U)); + return (kern_close_range(td, 0, lowfd, ~0U)); } #endif /* COMPAT_FREEBSD12 */ #if defined(COMPAT_43) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ofstat_args { int fd; struct ostat *sb; }; #endif /* ARGSUSED */ int ofstat(struct thread *td, struct ofstat_args *uap) { struct ostat oub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error == 0) { cvtstat(&ub, &oub); error = copyout(&oub, uap->sb, sizeof(oub)); } return (error); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD11) int freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap) { struct stat sb; struct freebsd11_stat osb; int error; error = kern_fstat(td, uap->fd, &sb); if (error != 0) return (error); error = freebsd11_cvtstat(&sb, &osb); if (error == 0) error = copyout(&osb, uap->sb, sizeof(osb)); return (error); } #endif /* COMPAT_FREEBSD11 */ /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fstat_args { int fd; struct stat *sb; }; #endif /* ARGSUSED */ int sys_fstat(struct thread *td, struct fstat_args *uap) { struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error == 0) error = copyout(&ub, uap->sb, sizeof(ub)); return (error); } int kern_fstat(struct thread *td, int fd, struct stat *sbp) { struct file *fp; int error; AUDIT_ARG_FD(fd); error = fget(td, fd, &cap_fstat_rights, &fp); if (__predict_false(error != 0)) return (error); AUDIT_ARG_FILE(td->td_proc, fp); error = fo_stat(fp, sbp, td->td_ucred); fdrop(fp, td); #ifdef __STAT_TIME_T_EXT sbp->st_atim_ext = 0; sbp->st_mtim_ext = 0; sbp->st_ctim_ext = 0; sbp->st_btim_ext = 0; #endif #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrstat_error(sbp, error); #endif return (error); } #if defined(COMPAT_FREEBSD11) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd11_nfstat_args { int fd; struct nstat *sb; }; #endif /* ARGSUSED */ int freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap) { struct nstat nub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error != 0) return (error); error = freebsd11_cvtnstat(&ub, &nub); if (error != 0) error = copyout(&nub, uap->sb, sizeof(nub)); return (error); } #endif /* COMPAT_FREEBSD11 */ /* * Return pathconf information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fpathconf_args { int fd; int name; }; #endif /* ARGSUSED */ int sys_fpathconf(struct thread *td, struct fpathconf_args *uap) { long value; int error; error = kern_fpathconf(td, uap->fd, uap->name, &value); if (error == 0) td->td_retval[0] = value; return (error); } int kern_fpathconf(struct thread *td, int fd, int name, long *valuep) { struct file *fp; struct vnode *vp; int error; error = fget(td, fd, &cap_fpathconf_rights, &fp); if (error != 0) return (error); if (name == _PC_ASYNC_IO) { *valuep = _POSIX_ASYNCHRONOUS_IO; goto out; } vp = fp->f_vnode; if (vp != NULL) { vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_PATHCONF(vp, name, valuep); VOP_UNLOCK(vp); } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { if (name != _PC_PIPE_BUF) { error = EINVAL; } else { *valuep = PIPE_BUF; error = 0; } } else { error = EOPNOTSUPP; } out: fdrop(fp, td); return (error); } /* * Copy filecaps structure allocating memory for ioctls array if needed. * * The last parameter indicates whether the fdtable is locked. If it is not and * ioctls are encountered, copying fails and the caller must lock the table. * * Note that if the table was not locked, the caller has to check the relevant * sequence counter to determine whether the operation was successful. */ bool filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked) { size_t size; if (src->fc_ioctls != NULL && !locked) return (false); memcpy(dst, src, sizeof(*src)); if (src->fc_ioctls == NULL) return (true); KASSERT(src->fc_nioctls > 0, ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK); memcpy(dst->fc_ioctls, src->fc_ioctls, size); return (true); } static u_long * filecaps_copy_prep(const struct filecaps *src) { u_long *ioctls; size_t size; if (__predict_true(src->fc_ioctls == NULL)) return (NULL); KASSERT(src->fc_nioctls > 0, ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; ioctls = malloc(size, M_FILECAPS, M_WAITOK); return (ioctls); } static void filecaps_copy_finish(const struct filecaps *src, struct filecaps *dst, u_long *ioctls) { size_t size; *dst = *src; if (__predict_true(src->fc_ioctls == NULL)) { MPASS(ioctls == NULL); return; } size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; dst->fc_ioctls = ioctls; bcopy(src->fc_ioctls, dst->fc_ioctls, size); } /* * Move filecaps structure to the new place and clear the old place. */ void filecaps_move(struct filecaps *src, struct filecaps *dst) { *dst = *src; bzero(src, sizeof(*src)); } /* * Fill the given filecaps structure with full rights. */ static void filecaps_fill(struct filecaps *fcaps) { CAP_ALL(&fcaps->fc_rights); fcaps->fc_ioctls = NULL; fcaps->fc_nioctls = -1; fcaps->fc_fcntls = CAP_FCNTL_ALL; } /* * Free memory allocated within filecaps structure. */ void filecaps_free(struct filecaps *fcaps) { free(fcaps->fc_ioctls, M_FILECAPS); bzero(fcaps, sizeof(*fcaps)); } static u_long * filecaps_free_prep(struct filecaps *fcaps) { u_long *ioctls; ioctls = fcaps->fc_ioctls; bzero(fcaps, sizeof(*fcaps)); return (ioctls); } static void filecaps_free_finish(u_long *ioctls) { free(ioctls, M_FILECAPS); } /* * Validate the given filecaps structure. */ static void filecaps_validate(const struct filecaps *fcaps, const char *func) { KASSERT(cap_rights_is_valid(&fcaps->fc_rights), ("%s: invalid rights", func)); KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, ("%s: invalid fcntls", func)); KASSERT(fcaps->fc_fcntls == 0 || cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL), ("%s: fcntls without CAP_FCNTL", func)); KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), ("%s: invalid ioctls", func)); KASSERT(fcaps->fc_nioctls == 0 || cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL), ("%s: ioctls without CAP_IOCTL", func)); } static void fdgrowtable_exp(struct filedesc *fdp, int nfd) { int nfd1; FILEDESC_XLOCK_ASSERT(fdp); nfd1 = fdp->fd_nfiles * 2; if (nfd1 < nfd) nfd1 = nfd; fdgrowtable(fdp, nfd1); } /* * Grow the file table to accommodate (at least) nfd descriptors. */ static void fdgrowtable(struct filedesc *fdp, int nfd) { struct filedesc0 *fdp0; struct freetable *ft; struct fdescenttbl *ntable; struct fdescenttbl *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; KASSERT(fdp->fd_nfiles > 0, ("zero-length file table")); /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_files; omap = fdp->fd_map; /* compute the size of the new table */ nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ if (nnfiles <= onfiles) /* the table is already large enough */ return; /* * Allocate a new table. We need enough space for the number of * entries, file entries themselves and the struct freetable we will use * when we decommission the table and place it on the freelist. * We place the struct freetable in the middle so we don't have * to worry about padding. */ ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) + nnfiles * sizeof(ntable->fdt_ofiles[0]) + sizeof(struct freetable), M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data */ ntable->fdt_nfiles = nnfiles; memcpy(ntable->fdt_ofiles, otable->fdt_ofiles, onfiles * sizeof(ntable->fdt_ofiles[0])); /* * Allocate a new map only if the old is not large enough. It will * grow at a slower rate than the table as it can map more * entries than the table can hold. */ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy over the old data and update the pointer */ memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); fdp->fd_map = nmap; } /* * Make sure that ntable is correctly initialized before we replace * fd_files poiner. Otherwise fget_unlocked() may see inconsistent * data. */ atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable); /* * Free the old file table when not shared by other threads or processes. * The old file table is considered to be shared when either are true: * - The process has more than one thread. * - The file descriptor table has been shared via fdshare(). * * When shared, the old file table will be placed on a freelist * which will be processed when the struct filedesc is released. * * Note that if onfiles == NDFILE, we're dealing with the original * static allocation contained within (struct filedesc0 *)fdp, * which must not be freed. */ if (onfiles > NDFILE) { /* * Note we may be called here from fdinit while allocating a * table for a new process in which case ->p_fd points * elsewhere. */ if (curproc->p_fd != fdp || FILEDESC_IS_ONLY_USER(fdp)) { free(otable, M_FILEDESC); } else { ft = (struct freetable *)&otable->fdt_ofiles[onfiles]; fdp0 = (struct filedesc0 *)fdp; ft->ft_table = otable; SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next); } } /* * The map does not have the same possibility of threads still * holding references to it. So always free it as long as it * does not reference the original static allocation. */ if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) free(omap, M_FILEDESC); } /* * Allocate a file descriptor for the process. */ int fdalloc(struct thread *td, int minfd, int *result) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; int fd, maxfd, allocfd; #ifdef RACCT int error; #endif FILEDESC_XLOCK_ASSERT(fdp); if (fdp->fd_freefile > minfd) minfd = fdp->fd_freefile; maxfd = getmaxfd(td); /* * Search the bitmap for a free descriptor starting at minfd. * If none is found, grow the file table. */ fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); if (__predict_false(fd >= maxfd)) return (EMFILE); if (__predict_false(fd >= fdp->fd_nfiles)) { allocfd = min(fd * 2, maxfd); #ifdef RACCT if (RACCT_ENABLED()) { error = racct_set_unlocked(p, RACCT_NOFILE, allocfd); if (error != 0) return (EMFILE); } #endif /* * fd is already equal to first free descriptor >= minfd, so * we only need to grow the table and we are done. */ fdgrowtable_exp(fdp, allocfd); } /* * Perform some sanity checks, then mark the file descriptor as * used and return it to the caller. */ KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles), ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, ("file descriptor isn't free")); fdused(fdp, fd); *result = fd; return (0); } /* * Allocate n file descriptors for the process. */ int fdallocn(struct thread *td, int minfd, int *fds, int n) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; int i; FILEDESC_XLOCK_ASSERT(fdp); for (i = 0; i < n; i++) if (fdalloc(td, 0, &fds[i]) != 0) break; if (i < n) { for (i--; i >= 0; i--) fdunused(fdp, fds[i]); return (EMFILE); } return (0); } /* * Create a new open file structure and allocate a file descriptor for the * process that refers to it. We add one reference to the file for the * descriptor table and one reference for resultfp. This is to prevent us * being preempted and the entry in the descriptor table closed after we * release the FILEDESC lock. */ int falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags, struct filecaps *fcaps) { struct file *fp; int error, fd; MPASS(resultfp != NULL); MPASS(resultfd != NULL); error = _falloc_noinstall(td, &fp, 2); if (__predict_false(error != 0)) { return (error); } error = finstall_refed(td, fp, &fd, flags, fcaps); if (__predict_false(error != 0)) { falloc_abort(td, fp); return (error); } *resultfp = fp; *resultfd = fd; return (0); } /* * Create a new open file structure without allocating a file descriptor. */ int _falloc_noinstall(struct thread *td, struct file **resultfp, u_int n) { struct file *fp; int maxuserfiles = maxfiles - (maxfiles / 20); int openfiles_new; static struct timeval lastfail; static int curfail; KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__)); MPASS(n > 0); openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1; if ((openfiles_new >= maxuserfiles && priv_check(td, PRIV_MAXFILES) != 0) || openfiles_new >= maxfiles) { atomic_subtract_int(&openfiles, 1); if (ppsratecheck(&lastfail, &curfail, 1)) { printf("kern.maxfiles limit exceeded by uid %i, (%s) " "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm); } return (ENFILE); } fp = uma_zalloc(file_zone, M_WAITOK); bzero(fp, sizeof(*fp)); refcount_init(&fp->f_count, n); fp->f_cred = crhold(td->td_ucred); fp->f_ops = &badfileops; *resultfp = fp; return (0); } void falloc_abort(struct thread *td, struct file *fp) { /* * For assertion purposes. */ refcount_init(&fp->f_count, 0); _fdrop(fp, td); } /* * Install a file in a file descriptor table. */ void _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, struct filecaps *fcaps) { struct filedescent *fde; MPASS(fp != NULL); if (fcaps != NULL) filecaps_validate(fcaps, __func__); FILEDESC_XLOCK_ASSERT(fdp); fde = &fdp->fd_ofiles[fd]; #ifdef CAPABILITIES seqc_write_begin(&fde->fde_seqc); #endif fde->fde_file = fp; fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0; if (fcaps != NULL) filecaps_move(fcaps, &fde->fde_caps); else filecaps_fill(&fde->fde_caps); #ifdef CAPABILITIES seqc_write_end(&fde->fde_seqc); #endif } int finstall_refed(struct thread *td, struct file *fp, int *fd, int flags, struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; int error; MPASS(fd != NULL); FILEDESC_XLOCK(fdp); error = fdalloc(td, 0, fd); if (__predict_true(error == 0)) { _finstall(fdp, fp, *fd, flags, fcaps); } FILEDESC_XUNLOCK(fdp); return (error); } int finstall(struct thread *td, struct file *fp, int *fd, int flags, struct filecaps *fcaps) { int error; MPASS(fd != NULL); if (!fhold(fp)) return (EBADF); error = finstall_refed(td, fp, fd, flags, fcaps); if (__predict_false(error != 0)) { fdrop(fp, td); } return (error); } /* * Build a new filedesc structure from another. * * If fdp is not NULL, return with it shared locked. */ struct filedesc * fdinit(void) { struct filedesc0 *newfdp0; struct filedesc *newfdp; newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO); newfdp = &newfdp0->fd_fd; /* Create the file descriptor table. */ FILEDESC_LOCK_INIT(newfdp); refcount_init(&newfdp->fd_refcnt, 1); refcount_init(&newfdp->fd_holdcnt, 1); newfdp->fd_map = newfdp0->fd_dmap; newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; newfdp->fd_files->fdt_nfiles = NDFILE; return (newfdp); } /* * Build a pwddesc structure from another. * Copy the current, root, and jail root vnode references. * * If pdp is not NULL, return with it shared locked. */ struct pwddesc * pdinit(struct pwddesc *pdp, bool keeplock) { struct pwddesc *newpdp; struct pwd *newpwd; newpdp = malloc(sizeof(*newpdp), M_PWDDESC, M_WAITOK | M_ZERO); PWDDESC_LOCK_INIT(newpdp); refcount_init(&newpdp->pd_refcount, 1); newpdp->pd_cmask = CMASK; if (pdp == NULL) { newpwd = pwd_alloc(); smr_serialized_store(&newpdp->pd_pwd, newpwd, true); return (newpdp); } PWDDESC_XLOCK(pdp); newpwd = pwd_hold_pwddesc(pdp); smr_serialized_store(&newpdp->pd_pwd, newpwd, true); if (!keeplock) PWDDESC_XUNLOCK(pdp); return (newpdp); } /* * Hold either filedesc or pwddesc of the passed process. * * The process lock is used to synchronize against the target exiting and * freeing the data. * * Clearing can be ilustrated in 3 steps: * 1. set the pointer to NULL. Either routine can race against it, hence * atomic_load_ptr. * 2. observe the process lock as not taken. Until then fdhold/pdhold can * race to either still see the pointer or find NULL. It is still safe to * grab a reference as clearing is stalled. * 3. after the lock is observed as not taken, any fdhold/pdhold calls are * guaranteed to see NULL, making it safe to finish clearing */ static struct filedesc * fdhold(struct proc *p) { struct filedesc *fdp; PROC_LOCK_ASSERT(p, MA_OWNED); fdp = atomic_load_ptr(&p->p_fd); if (fdp != NULL) refcount_acquire(&fdp->fd_holdcnt); return (fdp); } static struct pwddesc * pdhold(struct proc *p) { struct pwddesc *pdp; PROC_LOCK_ASSERT(p, MA_OWNED); pdp = atomic_load_ptr(&p->p_pd); if (pdp != NULL) refcount_acquire(&pdp->pd_refcount); return (pdp); } static void fddrop(struct filedesc *fdp) { if (refcount_load(&fdp->fd_holdcnt) > 1) { if (refcount_release(&fdp->fd_holdcnt) == 0) return; } FILEDESC_LOCK_DESTROY(fdp); uma_zfree(filedesc0_zone, fdp); } static void pddrop(struct pwddesc *pdp) { struct pwd *pwd; if (refcount_release_if_not_last(&pdp->pd_refcount)) return; PWDDESC_XLOCK(pdp); if (refcount_release(&pdp->pd_refcount) == 0) { PWDDESC_XUNLOCK(pdp); return; } pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); pwd_set(pdp, NULL); PWDDESC_XUNLOCK(pdp); pwd_drop(pwd); PWDDESC_LOCK_DESTROY(pdp); free(pdp, M_PWDDESC); } /* * Share a filedesc structure. */ struct filedesc * fdshare(struct filedesc *fdp) { refcount_acquire(&fdp->fd_refcnt); return (fdp); } /* * Share a pwddesc structure. */ struct pwddesc * pdshare(struct pwddesc *pdp) { refcount_acquire(&pdp->pd_refcount); return (pdp); } /* * Unshare a filedesc structure, if necessary by making a copy */ void fdunshare(struct thread *td) { struct filedesc *tmp; struct proc *p = td->td_proc; if (refcount_load(&p->p_fd->fd_refcnt) == 1) return; tmp = fdcopy(p->p_fd); fdescfree(td); p->p_fd = tmp; } /* * Unshare a pwddesc structure. */ void pdunshare(struct thread *td) { struct pwddesc *pdp; struct proc *p; p = td->td_proc; /* Not shared. */ if (refcount_load(&p->p_pd->pd_refcount) == 1) return; pdp = pdcopy(p->p_pd); pdescfree(td); p->p_pd = pdp; } /* * Copy a filedesc structure. A NULL pointer in returns a NULL reference, * this is to ease callers, not catch errors. */ struct filedesc * fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; struct filedescent *nfde, *ofde; int i, lastfile; MPASS(fdp != NULL); newfdp = fdinit(); FILEDESC_SLOCK(fdp); for (;;) { lastfile = fdlastfile(fdp); if (lastfile < newfdp->fd_nfiles) break; FILEDESC_SUNLOCK(fdp); fdgrowtable(newfdp, lastfile + 1); FILEDESC_SLOCK(fdp); } /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = fdp->fd_freefile; FILEDESC_FOREACH_FDE(fdp, i, ofde) { if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 || !fhold(ofde->fde_file)) { if (newfdp->fd_freefile == fdp->fd_freefile) newfdp->fd_freefile = i; continue; } nfde = &newfdp->fd_ofiles[i]; *nfde = *ofde; filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fdused_init(newfdp, i); } MPASS(newfdp->fd_freefile != -1); FILEDESC_SUNLOCK(fdp); return (newfdp); } /* * Copy a pwddesc structure. */ struct pwddesc * pdcopy(struct pwddesc *pdp) { struct pwddesc *newpdp; MPASS(pdp != NULL); newpdp = pdinit(pdp, true); newpdp->pd_cmask = pdp->pd_cmask; PWDDESC_XUNLOCK(pdp); return (newpdp); } /* * Clear POSIX style locks. This is only used when fdp looses a reference (i.e. * one of processes using it exits) and the table used to be shared. */ static void fdclearlocks(struct thread *td) { struct filedesc *fdp; struct filedesc_to_leader *fdtol; struct flock lf; struct file *fp; struct proc *p; struct vnode *vp; int i; p = td->td_proc; fdp = p->p_fd; fdtol = p->p_fdtol; MPASS(fdtol != NULL); FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, ("filedesc_to_refcount botch: fdl_refcount=%d", fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (p->p_leader->p_flag & P_ADVLOCK) != 0) { FILEDESC_FOREACH_FP(fdp, i, fp) { if (fp->f_type != DTYPE_VNODE || !fhold(fp)) continue; FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX); FILEDESC_XLOCK(fdp); fdrop(fp, td); } } retry: if (fdtol->fdl_refcount == 1) { if (fdp->fd_holdleaderscount > 0 && (p->p_leader->p_flag & P_ADVLOCK) != 0) { /* * close() or kern_dup() has cleared a reference * in a shared file descriptor table. */ fdp->fd_holdleaderswakeup = 1; sx_sleep(&fdp->fd_holdleaderscount, FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); goto retry; } if (fdtol->fdl_holdcount > 0) { /* * Ensure that fdtol->fdl_leader remains * valid in closef(). */ fdtol->fdl_wakeup = 1; sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); goto retry; } } fdtol->fdl_refcount--; if (fdtol->fdl_refcount == 0 && fdtol->fdl_holdcount == 0) { fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; fdtol->fdl_prev->fdl_next = fdtol->fdl_next; } else fdtol = NULL; p->p_fdtol = NULL; FILEDESC_XUNLOCK(fdp); if (fdtol != NULL) free(fdtol, M_FILEDESC_TO_LEADER); } /* * Release a filedesc structure. */ static void fdescfree_fds(struct thread *td, struct filedesc *fdp) { struct filedesc0 *fdp0; struct freetable *ft, *tft; struct filedescent *fde; struct file *fp; int i; KASSERT(refcount_load(&fdp->fd_refcnt) == 0, ("%s: fd table %p carries references", __func__, fdp)); /* * Serialize with threads iterating over the table, if any. */ if (refcount_load(&fdp->fd_holdcnt) > 1) { FILEDESC_XLOCK(fdp); FILEDESC_XUNLOCK(fdp); } FILEDESC_FOREACH_FDE(fdp, i, fde) { fp = fde->fde_file; fdefree_last(fde); (void) closef(fp, td); } if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) free(fdp->fd_map, M_FILEDESC); if (fdp->fd_nfiles > NDFILE) free(fdp->fd_files, M_FILEDESC); fdp0 = (struct filedesc0 *)fdp; SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft) free(ft->ft_table, M_FILEDESC); fddrop(fdp); } void fdescfree(struct thread *td) { struct proc *p; struct filedesc *fdp; p = td->td_proc; fdp = p->p_fd; MPASS(fdp != NULL); #ifdef RACCT if (RACCT_ENABLED()) racct_set_unlocked(p, RACCT_NOFILE, 0); #endif if (p->p_fdtol != NULL) fdclearlocks(td); /* * Check fdhold for an explanation. */ atomic_store_ptr(&p->p_fd, NULL); atomic_thread_fence_seq_cst(); PROC_WAIT_UNLOCKED(p); if (refcount_release(&fdp->fd_refcnt) == 0) return; fdescfree_fds(td, fdp); } void pdescfree(struct thread *td) { struct proc *p; struct pwddesc *pdp; p = td->td_proc; pdp = p->p_pd; MPASS(pdp != NULL); /* * Check pdhold for an explanation. */ atomic_store_ptr(&p->p_pd, NULL); atomic_thread_fence_seq_cst(); PROC_WAIT_UNLOCKED(p); pddrop(pdp); } /* * For setugid programs, we don't want to people to use that setugidness * to generate error messages which write to a file which otherwise would * otherwise be off-limits to the process. We check for filesystems where * the vnode can change out from under us after execve (like [lin]procfs). * * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is * sufficient. We also don't check for setugidness since we know we are. */ static bool is_unsafe(struct file *fp) { struct vnode *vp; if (fp->f_type != DTYPE_VNODE) return (false); vp = fp->f_vnode; return ((vp->v_vflag & VV_PROCDEP) != 0); } /* * Make this setguid thing safe, if at all possible. */ void fdsetugidsafety(struct thread *td) { struct filedesc *fdp; struct file *fp; int i; fdp = td->td_proc->p_fd; KASSERT(refcount_load(&fdp->fd_refcnt) == 1, ("the fdtable should not be shared")); MPASS(fdp->fd_nfiles >= 3); for (i = 0; i <= 2; i++) { fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL && is_unsafe(fp)) { FILEDESC_XLOCK(fdp); knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } } } /* * If a specific file object occupies a specific file descriptor, close the * file descriptor entry and drop a reference on the file object. This is a * convenience function to handle a subsequent error in a function that calls * falloc() that handles the race that another thread might have closed the * file descriptor out from under the thread creating the file object. */ void fdclose(struct thread *td, struct file *fp, int idx) { struct filedesc *fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[idx].fde_file == fp) { fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else FILEDESC_XUNLOCK(fdp); } /* * Close any files on exec? */ void fdcloseexec(struct thread *td) { struct filedesc *fdp; struct filedescent *fde; struct file *fp; int i; fdp = td->td_proc->p_fd; KASSERT(refcount_load(&fdp->fd_refcnt) == 1, ("the fdtable should not be shared")); FILEDESC_FOREACH_FDE(fdp, i, fde) { fp = fde->fde_file; if (fp->f_type == DTYPE_MQUEUE || (fde->fde_flags & UF_EXCLOSE)) { FILEDESC_XLOCK(fdp); fdfree(fdp, i); (void) closefp(fdp, i, fp, td, false, false); FILEDESC_UNLOCK_ASSERT(fdp); } } } /* * It is unsafe for set[ug]id processes to be started with file * descriptors 0..2 closed, as these descriptors are given implicit * significance in the Standard C library. fdcheckstd() will create a * descriptor referencing /dev/null for each of stdin, stdout, and * stderr that is not already open. */ int fdcheckstd(struct thread *td) { struct filedesc *fdp; register_t save; int i, error, devnull; fdp = td->td_proc->p_fd; KASSERT(refcount_load(&fdp->fd_refcnt) == 1, ("the fdtable should not be shared")); MPASS(fdp->fd_nfiles >= 3); devnull = -1; for (i = 0; i <= 2; i++) { if (fdp->fd_ofiles[i].fde_file != NULL) continue; save = td->td_retval[0]; if (devnull != -1) { error = kern_dup(td, FDDUP_FIXED, 0, devnull, i); } else { error = kern_openat(td, AT_FDCWD, "/dev/null", UIO_SYSSPACE, O_RDWR, 0); if (error == 0) { devnull = td->td_retval[0]; KASSERT(devnull == i, ("we didn't get our fd")); } } td->td_retval[0] = save; if (error != 0) return (error); } return (0); } /* * Internal form of close. Decrement reference count on file structure. * Note: td may be NULL when closing a file that was being passed in a * message. */ int closef(struct file *fp, struct thread *td) { struct vnode *vp; struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; MPASS(td != NULL); /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor, and the thread pointer * will be NULL. Callers should be careful only to pass a * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. */ if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, F_UNLCK, &lf, F_POSIX); } fdtol = td->td_proc->p_fdtol; if (fdtol != NULL) { /* * Handle special case where file descriptor table is * shared between multiple process leaders. */ fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); for (fdtol = fdtol->fdl_next; fdtol != td->td_proc->p_fdtol; fdtol = fdtol->fdl_next) { if ((fdtol->fdl_leader->p_flag & P_ADVLOCK) == 0) continue; fdtol->fdl_holdcount++; FILEDESC_XUNLOCK(fdp); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); FILEDESC_XLOCK(fdp); fdtol->fdl_holdcount--; if (fdtol->fdl_holdcount == 0 && fdtol->fdl_wakeup != 0) { fdtol->fdl_wakeup = 0; wakeup(fdtol); } } FILEDESC_XUNLOCK(fdp); } } return (fdrop_close(fp, td)); } /* * Hack for file descriptor passing code. */ void closef_nothread(struct file *fp) { fdrop(fp, NULL); } /* * Initialize the file pointer with the specified properties. * * The ops are set with release semantics to be certain that the flags, type, * and data are visible when ops is. This is to prevent ops methods from being * called with bad data. */ void finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) { fp->f_data = data; fp->f_flag = flag; fp->f_type = type; atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } void finit_vnode(struct file *fp, u_int flag, void *data, struct fileops *ops) { fp->f_seqcount[UIO_READ] = 1; fp->f_seqcount[UIO_WRITE] = 1; finit(fp, (flag & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, data, ops); } int fget_cap_noref(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp) { struct filedescent *fde; int error; FILEDESC_LOCK_ASSERT(fdp); *fpp = NULL; fde = fdeget_noref(fdp, fd); if (fde == NULL) { error = EBADF; goto out; } #ifdef CAPABILITIES error = cap_check(cap_rights_fde_inline(fde), needrightsp); if (error != 0) goto out; #endif if (havecapsp != NULL) filecaps_copy(&fde->fde_caps, havecapsp, true); *fpp = fde->fde_file; error = 0; out: return (error); } #ifdef CAPABILITIES int fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp) { struct filedesc *fdp = td->td_proc->p_fd; int error; struct file *fp; seqc_t seq; *fpp = NULL; for (;;) { error = fget_unlocked_seq(td, fd, needrightsp, &fp, &seq); if (error != 0) return (error); if (havecapsp != NULL) { if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecapsp, false)) { fdrop(fp, td); goto get_locked; } } if (!fd_modified(fdp, fd, seq)) break; fdrop(fp, td); } *fpp = fp; return (0); get_locked: FILEDESC_SLOCK(fdp); error = fget_cap_noref(fdp, fd, needrightsp, fpp, havecapsp); if (error == 0 && !fhold(*fpp)) error = EBADF; FILEDESC_SUNLOCK(fdp); return (error); } #else int fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp) { int error; error = fget_unlocked(td, fd, needrightsp, fpp); if (havecapsp != NULL && error == 0) filecaps_fill(havecapsp); return (error); } #endif #ifdef CAPABILITIES int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch) { const struct filedescent *fde; const struct fdescenttbl *fdt; struct filedesc *fdp; struct file *fp; struct vnode *vp; const cap_rights_t *haverights; cap_rights_t rights; seqc_t seq; VFS_SMR_ASSERT_ENTERED(); rights = *ndp->ni_rightsneeded; cap_rights_set_one(&rights, CAP_LOOKUP); fdp = curproc->p_fd; fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) return (EBADF); seq = seqc_read_notmodify(fd_seqc(fdt, fd)); fde = &fdt->fdt_ofiles[fd]; haverights = cap_rights_fde_inline(fde); fp = fde->fde_file; if (__predict_false(fp == NULL)) return (EAGAIN); if (__predict_false(cap_check_inline_transient(haverights, &rights))) return (EAGAIN); *fsearch = ((fp->f_flag & FSEARCH) != 0); vp = fp->f_vnode; if (__predict_false(vp == NULL)) { return (EAGAIN); } if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) { return (EAGAIN); } /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ atomic_thread_fence_acq(); fdt = fdp->fd_files; if (__predict_false(!seqc_consistent_no_fence(fd_seqc(fdt, fd), seq))) return (EAGAIN); /* * If file descriptor doesn't have all rights, * all lookups relative to it must also be * strictly relative. * * Not yet supported by fast path. */ CAP_ALL(&rights); if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) || ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || ndp->ni_filecaps.fc_nioctls != -1) { #ifdef notyet ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; #else return (EAGAIN); #endif } *vpp = vp; return (0); } #else int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch) { const struct fdescenttbl *fdt; struct filedesc *fdp; struct file *fp; struct vnode *vp; VFS_SMR_ASSERT_ENTERED(); fdp = curproc->p_fd; fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) return (EBADF); fp = fdt->fdt_ofiles[fd].fde_file; if (__predict_false(fp == NULL)) return (EAGAIN); *fsearch = ((fp->f_flag & FSEARCH) != 0); vp = fp->f_vnode; if (__predict_false(vp == NULL || vp->v_type != VDIR)) { return (EAGAIN); } /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ atomic_thread_fence_acq(); fdt = fdp->fd_files; if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file)) return (EAGAIN); filecaps_fill(&ndp->ni_filecaps); *vpp = vp; return (0); } #endif /* * Fetch the descriptor locklessly. * * We avoid fdrop() races by never raising a refcount above 0. To accomplish * this we have to use a cmpset loop rather than an atomic_add. The descriptor * must be re-verified once we acquire a reference to be certain that the * identity is still correct and we did not lose a race due to preemption. * * Force a reload of fdt when looping. Another thread could reallocate * the table before this fd was closed, so it is possible that there is * a stale fp pointer in cached version. */ #ifdef CAPABILITIES static int fget_unlocked_seq(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, seqc_t *seqp) { struct filedesc *fdp; const struct filedescent *fde; const struct fdescenttbl *fdt; struct file *fp; seqc_t seq; cap_rights_t haverights; int error; fdp = td->td_proc->p_fd; fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) return (EBADF); for (;;) { seq = seqc_read_notmodify(fd_seqc(fdt, fd)); fde = &fdt->fdt_ofiles[fd]; haverights = *cap_rights_fde_inline(fde); fp = fde->fde_file; if (__predict_false(fp == NULL)) { if (seqc_consistent(fd_seqc(fdt, fd), seq)) return (EBADF); fdt = atomic_load_ptr(&fdp->fd_files); continue; } error = cap_check_inline(&haverights, needrightsp); if (__predict_false(error != 0)) { if (seqc_consistent(fd_seqc(fdt, fd), seq)) return (error); fdt = atomic_load_ptr(&fdp->fd_files); continue; } if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) { fdt = atomic_load_ptr(&fdp->fd_files); continue; } /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ atomic_thread_fence_acq(); fdt = fdp->fd_files; if (seqc_consistent_no_fence(fd_seqc(fdt, fd), seq)) break; fdrop(fp, td); } *fpp = fp; if (seqp != NULL) { *seqp = seq; } return (0); } #else static int fget_unlocked_seq(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp, seqc_t *seqp __unused) { struct filedesc *fdp; const struct fdescenttbl *fdt; struct file *fp; fdp = td->td_proc->p_fd; fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) return (EBADF); for (;;) { fp = fdt->fdt_ofiles[fd].fde_file; if (__predict_false(fp == NULL)) return (EBADF); if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) { fdt = atomic_load_ptr(&fdp->fd_files); continue; } /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ atomic_thread_fence_acq(); fdt = fdp->fd_files; if (__predict_true(fp == fdt->fdt_ofiles[fd].fde_file)) break; fdrop(fp, td); } *fpp = fp; return (0); } #endif /* * See the comments in fget_unlocked_seq for an explanation of how this works. * * This is a simplified variant which bails out to the aforementioned routine * if anything goes wrong. In practice this only happens when userspace is * racing with itself. */ int fget_unlocked(struct thread *td, int fd, cap_rights_t *needrightsp, struct file **fpp) { struct filedesc *fdp; #ifdef CAPABILITIES const struct filedescent *fde; #endif const struct fdescenttbl *fdt; struct file *fp; #ifdef CAPABILITIES seqc_t seq; const cap_rights_t *haverights; #endif fdp = td->td_proc->p_fd; fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) { *fpp = NULL; return (EBADF); } #ifdef CAPABILITIES seq = seqc_read_notmodify(fd_seqc(fdt, fd)); fde = &fdt->fdt_ofiles[fd]; haverights = cap_rights_fde_inline(fde); fp = fde->fde_file; #else fp = fdt->fdt_ofiles[fd].fde_file; #endif if (__predict_false(fp == NULL)) goto out_fallback; #ifdef CAPABILITIES if (__predict_false(cap_check_inline_transient(haverights, needrightsp))) goto out_fallback; #endif if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) goto out_fallback; /* * Use an acquire barrier to force re-reading of fdt so it is * refreshed for verification. */ atomic_thread_fence_acq(); fdt = fdp->fd_files; #ifdef CAPABILITIES if (__predict_false(!seqc_consistent_no_fence(fd_seqc(fdt, fd), seq))) #else if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file)) #endif goto out_fdrop; *fpp = fp; return (0); out_fdrop: fdrop(fp, td); out_fallback: *fpp = NULL; return (fget_unlocked_seq(td, fd, needrightsp, fpp, NULL)); } /* * Translate fd -> file when the caller guarantees the file descriptor table * can't be changed by others. * * Note this does not mean the file object itself is only visible to the caller, * merely that it wont disappear without having to be referenced. * * Must be paired with fput_only_user. */ #ifdef CAPABILITIES int fget_only_user(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, struct file **fpp) { const struct filedescent *fde; const struct fdescenttbl *fdt; const cap_rights_t *haverights; struct file *fp; int error; MPASS(FILEDESC_IS_ONLY_USER(fdp)); *fpp = NULL; if (__predict_false(fd >= fdp->fd_nfiles)) return (EBADF); fdt = fdp->fd_files; fde = &fdt->fdt_ofiles[fd]; fp = fde->fde_file; if (__predict_false(fp == NULL)) return (EBADF); MPASS(refcount_load(&fp->f_count) > 0); haverights = cap_rights_fde_inline(fde); error = cap_check_inline(haverights, needrightsp); if (__predict_false(error != 0)) return (error); *fpp = fp; return (0); } #else int fget_only_user(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, struct file **fpp) { struct file *fp; MPASS(FILEDESC_IS_ONLY_USER(fdp)); *fpp = NULL; if (__predict_false(fd >= fdp->fd_nfiles)) return (EBADF); fp = fdp->fd_ofiles[fd].fde_file; if (__predict_false(fp == NULL)) return (EBADF); MPASS(refcount_load(&fp->f_count) > 0); *fpp = fp; return (0); } #endif /* * Extract the file pointer associated with the specified descriptor for the * current user process. * * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * * File's rights will be checked against the capability rights mask. * * If an error occurred the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, cap_rights_t *needrightsp) { struct file *fp; int error; *fpp = NULL; error = fget_unlocked(td, fd, needrightsp, &fp); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_ops == &badfileops)) { fdrop(fp, td); return (EBADF); } /* * FREAD and FWRITE failure return EBADF as per POSIX. */ error = 0; switch (flags) { case FREAD: case FWRITE: if ((fp->f_flag & flags) == 0) error = EBADF; break; case FEXEC: if (fp->f_ops != &path_fileops && ((fp->f_flag & (FREAD | FEXEC)) == 0 || (fp->f_flag & FWRITE) != 0)) error = EBADF; break; case 0: break; default: KASSERT(0, ("wrong flags")); } if (error != 0) { fdrop(fp, td); return (error); } *fpp = fp; return (0); } int fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, 0, rightsp)); } int fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, vm_prot_t *maxprotp, struct file **fpp) { int error; #ifndef CAPABILITIES error = _fget(td, fd, fpp, 0, rightsp); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; return (error); #else cap_rights_t fdrights; struct filedesc *fdp; struct file *fp; seqc_t seq; *fpp = NULL; fdp = td->td_proc->p_fd; MPASS(cap_rights_is_set(rightsp, CAP_MMAP)); for (;;) { error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_ops == &badfileops)) { fdrop(fp, td); return (EBADF); } if (maxprotp != NULL) fdrights = *cap_rights(fdp, fd); if (!fd_modified(fdp, fd, seq)) break; fdrop(fp, td); } /* * If requested, convert capability rights to access flags. */ if (maxprotp != NULL) *maxprotp = cap_rights_to_vmprot(&fdrights); *fpp = fp; return (0); #endif } int fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, FREAD, rightsp)); } int fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { return (_fget(td, fd, fpp, FWRITE, rightsp)); } int fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl, struct file **fpp) { #ifndef CAPABILITIES return (fget_unlocked(td, fd, rightsp, fpp)); #else struct filedesc *fdp = td->td_proc->p_fd; struct file *fp; int error; seqc_t seq; *fpp = NULL; MPASS(cap_rights_is_set(rightsp, CAP_FCNTL)); for (;;) { error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq); if (error != 0) return (error); error = cap_fcntl_check(fdp, fd, needfcntl); if (!fd_modified(fdp, fd, seq)) break; fdrop(fp, td); } if (error != 0) { fdrop(fp, td); return (error); } *fpp = fp; return (0); #endif } /* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. * * XXX: what about the unused flags ? */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp, struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; error = _fget(td, fd, &fp, flags, needrightsp); if (error != 0) return (error); if (fp->f_vnode == NULL) { error = EINVAL; } else { *vpp = fp->f_vnode; vref(*vpp); } fdrop(fp, td); return (error); } int fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, 0, rightsp, vpp)); } int fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp, struct filecaps *havecaps, struct vnode **vpp) { struct filecaps caps; struct file *fp; int error; error = fget_cap(td, fd, needrightsp, &fp, &caps); if (error != 0) return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto out; } if (fp->f_vnode == NULL) { error = EINVAL; goto out; } *havecaps = caps; *vpp = fp->f_vnode; vref(*vpp); fdrop(fp, td); return (0); out: filecaps_free(&caps); fdrop(fp, td); return (error); } int fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FREAD, rightsp, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FEXEC, rightsp, vpp)); } #ifdef notyet int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp) { return (_fgetvp(td, fd, FWRITE, rightsp, vpp)); } #endif /* * Handle the last reference to a file being closed. * * Without the noinline attribute clang keeps inlining the func thorough this * file when fdrop is used. */ int __noinline _fdrop(struct file *fp, struct thread *td) { int error; #ifdef INVARIANTS int count; count = refcount_load(&fp->f_count); if (count != 0) panic("fdrop: fp %p count %d", fp, count); #endif error = fo_close(fp, td); atomic_subtract_int(&openfiles, 1); crfree(fp->f_cred); free(fp->f_advice, M_FADVISE); uma_zfree(file_zone, fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on the entire file * (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ #ifndef _SYS_SYSPROTO_H_ struct flock_args { int fd; int how; }; #endif /* ARGSUSED */ int sys_flock(struct thread *td, struct flock_args *uap) { struct file *fp; struct vnode *vp; struct flock lf; int error; error = fget(td, uap->fd, &cap_flock_rights, &fp); if (error != 0) return (error); error = EOPNOTSUPP; if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { goto done; } if (fp->f_ops == &path_fileops) { goto done; } error = 0; vp = fp->f_vnode; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; atomic_clear_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto done; } if (uap->how & LOCK_EX) lf.l_type = F_WRLCK; else if (uap->how & LOCK_SH) lf.l_type = F_RDLCK; else { error = EBADF; goto done; } atomic_set_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done: fdrop(fp, td); return (error); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) { struct filedescent *newfde, *oldfde; struct file *fp; u_long *ioctls; int error, indx; KASSERT(openerror == ENODEV || openerror == ENXIO, ("unexpected error %d in %s", openerror, __func__)); /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, then reject. */ FILEDESC_XLOCK(fdp); if ((fp = fget_noref(fdp, dfd)) == NULL) { FILEDESC_XUNLOCK(fdp); return (EBADF); } error = fdalloc(td, 0, &indx); if (error != 0) { FILEDESC_XUNLOCK(fdp); return (error); } /* * There are two cases of interest here. * * For ENODEV simply dup (dfd) to file descriptor (indx) and return. * * For ENXIO steal away the file structure from (dfd) and store it in * (indx). (dfd) is effectively closed by this operation. */ switch (openerror) { case ENODEV: /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { fdunused(fdp, indx); FILEDESC_XUNLOCK(fdp); return (EACCES); } if (!fhold(fp)) { fdunused(fdp, indx); FILEDESC_XUNLOCK(fdp); return (EBADF); } newfde = &fdp->fd_ofiles[indx]; oldfde = &fdp->fd_ofiles[dfd]; ioctls = filecaps_copy_prep(&oldfde->fde_caps); #ifdef CAPABILITIES seqc_write_begin(&newfde->fde_seqc); #endif fde_copy(oldfde, newfde); filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps, ioctls); #ifdef CAPABILITIES seqc_write_end(&newfde->fde_seqc); #endif break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ newfde = &fdp->fd_ofiles[indx]; oldfde = &fdp->fd_ofiles[dfd]; #ifdef CAPABILITIES seqc_write_begin(&oldfde->fde_seqc); seqc_write_begin(&newfde->fde_seqc); #endif fde_copy(oldfde, newfde); oldfde->fde_file = NULL; fdunused(fdp, dfd); #ifdef CAPABILITIES seqc_write_end(&newfde->fde_seqc); seqc_write_end(&oldfde->fde_seqc); #endif break; } FILEDESC_XUNLOCK(fdp); *indxp = indx; return (0); } /* * This sysctl determines if we will allow a process to chroot(2) if it * has a directory open: * 0: disallowed for all processes. * 1: allowed for processes that were not already chroot(2)'ed. * 2: allowed for all processes. */ static int chroot_allow_open_directories = 1; SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, &chroot_allow_open_directories, 0, "Allow a process to chroot(2) if it has a directory open"); /* * Helper function for raised chroot(2) security function: Refuse if * any filedescriptors are open directories. */ static int chroot_refuse_vdir_fds(struct filedesc *fdp) { struct vnode *vp; struct file *fp; int i; FILEDESC_LOCK_ASSERT(fdp); FILEDESC_FOREACH_FP(fdp, i, fp) { if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vp->v_type == VDIR) return (EPERM); } } return (0); } static void pwd_fill(struct pwd *oldpwd, struct pwd *newpwd) { if (newpwd->pwd_cdir == NULL && oldpwd->pwd_cdir != NULL) { vrefact(oldpwd->pwd_cdir); newpwd->pwd_cdir = oldpwd->pwd_cdir; } if (newpwd->pwd_rdir == NULL && oldpwd->pwd_rdir != NULL) { vrefact(oldpwd->pwd_rdir); newpwd->pwd_rdir = oldpwd->pwd_rdir; } if (newpwd->pwd_jdir == NULL && oldpwd->pwd_jdir != NULL) { vrefact(oldpwd->pwd_jdir); newpwd->pwd_jdir = oldpwd->pwd_jdir; } } struct pwd * pwd_hold_pwddesc(struct pwddesc *pdp) { struct pwd *pwd; PWDDESC_ASSERT_XLOCKED(pdp); pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (pwd != NULL) refcount_acquire(&pwd->pwd_refcount); return (pwd); } bool pwd_hold_smr(struct pwd *pwd) { MPASS(pwd != NULL); if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) { return (true); } return (false); } struct pwd * pwd_hold(struct thread *td) { struct pwddesc *pdp; struct pwd *pwd; pdp = td->td_proc->p_pd; vfs_smr_enter(); pwd = vfs_smr_entered_load(&pdp->pd_pwd); if (pwd_hold_smr(pwd)) { vfs_smr_exit(); return (pwd); } vfs_smr_exit(); PWDDESC_XLOCK(pdp); pwd = pwd_hold_pwddesc(pdp); MPASS(pwd != NULL); PWDDESC_XUNLOCK(pdp); return (pwd); } struct pwd * pwd_hold_proc(struct proc *p) { struct pwddesc *pdp; struct pwd *pwd; PROC_ASSERT_HELD(p); PROC_LOCK(p); pdp = pdhold(p); MPASS(pdp != NULL); PROC_UNLOCK(p); PWDDESC_XLOCK(pdp); pwd = pwd_hold_pwddesc(pdp); MPASS(pwd != NULL); PWDDESC_XUNLOCK(pdp); pddrop(pdp); return (pwd); } static struct pwd * pwd_alloc(void) { struct pwd *pwd; pwd = uma_zalloc_smr(pwd_zone, M_WAITOK); bzero(pwd, sizeof(*pwd)); refcount_init(&pwd->pwd_refcount, 1); return (pwd); } void pwd_drop(struct pwd *pwd) { if (!refcount_release(&pwd->pwd_refcount)) return; if (pwd->pwd_cdir != NULL) vrele(pwd->pwd_cdir); if (pwd->pwd_rdir != NULL) vrele(pwd->pwd_rdir); if (pwd->pwd_jdir != NULL) vrele(pwd->pwd_jdir); uma_zfree_smr(pwd_zone, pwd); } /* * The caller is responsible for invoking priv_check() and * mac_vnode_check_chroot() to authorize this operation. */ int pwd_chroot(struct thread *td, struct vnode *vp) { struct pwddesc *pdp; struct filedesc *fdp; struct pwd *newpwd, *oldpwd; int error; fdp = td->td_proc->p_fd; pdp = td->td_proc->p_pd; newpwd = pwd_alloc(); FILEDESC_SLOCK(fdp); PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && oldpwd->pwd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); FILEDESC_SUNLOCK(fdp); if (error != 0) { PWDDESC_XUNLOCK(pdp); pwd_drop(newpwd); return (error); } } else { FILEDESC_SUNLOCK(fdp); } vrefact(vp); newpwd->pwd_rdir = vp; if (oldpwd->pwd_jdir == NULL) { vrefact(vp); newpwd->pwd_jdir = vp; } pwd_fill(oldpwd, newpwd); pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); return (0); } void pwd_chdir(struct thread *td, struct vnode *vp) { struct pwddesc *pdp; struct pwd *newpwd, *oldpwd; VNPASS(vp->v_usecount > 0, vp); newpwd = pwd_alloc(); pdp = td->td_proc->p_pd; PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); newpwd->pwd_cdir = vp; pwd_fill(oldpwd, newpwd); pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } /* * jail_attach(2) changes both root and working directories. */ int pwd_chroot_chdir(struct thread *td, struct vnode *vp) { struct pwddesc *pdp; struct filedesc *fdp; struct pwd *newpwd, *oldpwd; int error; fdp = td->td_proc->p_fd; pdp = td->td_proc->p_pd; newpwd = pwd_alloc(); FILEDESC_SLOCK(fdp); PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); error = chroot_refuse_vdir_fds(fdp); FILEDESC_SUNLOCK(fdp); if (error != 0) { PWDDESC_XUNLOCK(pdp); pwd_drop(newpwd); return (error); } vrefact(vp); newpwd->pwd_rdir = vp; vrefact(vp); newpwd->pwd_cdir = vp; if (oldpwd->pwd_jdir == NULL) { vrefact(vp); newpwd->pwd_jdir = vp; } pwd_fill(oldpwd, newpwd); pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); return (0); } void pwd_ensure_dirs(void) { struct pwddesc *pdp; struct pwd *oldpwd, *newpwd; pdp = curproc->p_pd; PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) { PWDDESC_XUNLOCK(pdp); return; } PWDDESC_XUNLOCK(pdp); newpwd = pwd_alloc(); PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); pwd_fill(oldpwd, newpwd); if (newpwd->pwd_cdir == NULL) { vrefact(rootvnode); newpwd->pwd_cdir = rootvnode; } if (newpwd->pwd_rdir == NULL) { vrefact(rootvnode); newpwd->pwd_rdir = rootvnode; } pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } void pwd_set_rootvnode(void) { struct pwddesc *pdp; struct pwd *oldpwd, *newpwd; pdp = curproc->p_pd; newpwd = pwd_alloc(); PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); vrefact(rootvnode); newpwd->pwd_cdir = rootvnode; vrefact(rootvnode); newpwd->pwd_rdir = rootvnode; pwd_fill(oldpwd, newpwd); pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } /* * Scan all active processes and prisons to see if any of them have a current * or root directory of `olddp'. If so, replace them with the new mount point. */ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) { struct pwddesc *pdp; struct pwd *newpwd, *oldpwd; struct prison *pr; struct proc *p; int nrele; if (vrefcnt(olddp) == 1) return; nrele = 0; newpwd = pwd_alloc(); sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); pdp = pdhold(p); PROC_UNLOCK(p); if (pdp == NULL) continue; PWDDESC_XLOCK(pdp); oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (oldpwd == NULL || (oldpwd->pwd_cdir != olddp && oldpwd->pwd_rdir != olddp && oldpwd->pwd_jdir != olddp)) { PWDDESC_XUNLOCK(pdp); pddrop(pdp); continue; } if (oldpwd->pwd_cdir == olddp) { vrefact(newdp); newpwd->pwd_cdir = newdp; } if (oldpwd->pwd_rdir == olddp) { vrefact(newdp); newpwd->pwd_rdir = newdp; } if (oldpwd->pwd_jdir == olddp) { vrefact(newdp); newpwd->pwd_jdir = newdp; } pwd_fill(oldpwd, newpwd); pwd_set(pdp, newpwd); PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); pddrop(pdp); newpwd = pwd_alloc(); } sx_sunlock(&allproc_lock); pwd_drop(newpwd); if (rootvnode == olddp) { vrefact(newdp); rootvnode = newdp; nrele++; } mtx_lock(&prison0.pr_mtx); if (prison0.pr_root == olddp) { vrefact(newdp); prison0.pr_root = newdp; nrele++; } mtx_unlock(&prison0.pr_mtx); sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { mtx_lock(&pr->pr_mtx); if (pr->pr_root == olddp) { vrefact(newdp); pr->pr_root = newdp; nrele++; } mtx_unlock(&pr->pr_mtx); } sx_sunlock(&allprison_lock); while (nrele--) vrele(olddp); } struct filedesc_to_leader * filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) { struct filedesc_to_leader *fdtol; fdtol = malloc(sizeof(struct filedesc_to_leader), M_FILEDESC_TO_LEADER, M_WAITOK); fdtol->fdl_refcount = 1; fdtol->fdl_holdcount = 0; fdtol->fdl_wakeup = 0; fdtol->fdl_leader = leader; if (old != NULL) { FILEDESC_XLOCK(fdp); fdtol->fdl_next = old->fdl_next; fdtol->fdl_prev = old; old->fdl_next = fdtol; fdtol->fdl_next->fdl_prev = fdtol; FILEDESC_XUNLOCK(fdp); } else { fdtol->fdl_next = fdtol; fdtol->fdl_prev = fdtol; } return (fdtol); } static int sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS) { NDSLOTTYPE *map; struct filedesc *fdp; u_int namelen; int count, off, minoff; namelen = arg2; if (namelen != 1) return (EINVAL); if (*(int *)arg1 != 0) return (EINVAL); fdp = curproc->p_fd; count = 0; FILEDESC_SLOCK(fdp); map = fdp->fd_map; off = NDSLOT(fdp->fd_nfiles - 1); for (minoff = NDSLOT(0); off >= minoff; --off) count += bitcountl(map[off]); FILEDESC_SUNLOCK(fdp); return (SYSCTL_OUT(req, &count, sizeof(count))); } static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds, CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds, "Number of open file descriptors"); /* * Get file structures globally. */ static int sysctl_kern_file(SYSCTL_HANDLER_ARGS) { struct xfile xf; struct filedesc *fdp; struct file *fp; struct proc *p; int error, n; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); if (req->oldptr == NULL) { n = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; /* overestimates sparse tables. */ n += fdp->fd_nfiles; fddrop(fdp); } sx_sunlock(&allproc_lock); return (SYSCTL_OUT(req, 0, n * sizeof(xf))); } error = 0; bzero(&xf, sizeof(xf)); xf.xf_size = sizeof(xf); sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } if (p_cansee(req->td, p) != 0) { PROC_UNLOCK(p); continue; } xf.xf_pid = p->p_pid; xf.xf_uid = p->p_ucred->cr_uid; fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; FILEDESC_SLOCK(fdp); FILEDESC_FOREACH_FP(fdp, n, fp) { if (refcount_load(&fdp->fd_refcnt) == 0) break; xf.xf_fd = n; xf.xf_file = (uintptr_t)fp; xf.xf_data = (uintptr_t)fp->f_data; xf.xf_vnode = (uintptr_t)fp->f_vnode; xf.xf_type = (uintptr_t)fp->f_type; xf.xf_count = refcount_load(&fp->f_count); xf.xf_msgcount = 0; xf.xf_offset = foffset_get(fp); xf.xf_flag = fp->f_flag; error = SYSCTL_OUT(req, &xf, sizeof(xf)); if (error) break; } FILEDESC_SUNLOCK(fdp); fddrop(fdp); if (error) break; } sx_sunlock(&allproc_lock); return (error); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE, 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); #ifdef KINFO_FILE_SIZE CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); #endif static int xlate_fflags(int fflags) { static const struct { int fflag; int kf_fflag; } fflags_table[] = { { FAPPEND, KF_FLAG_APPEND }, { FASYNC, KF_FLAG_ASYNC }, { FFSYNC, KF_FLAG_FSYNC }, { FHASLOCK, KF_FLAG_HASLOCK }, { FNONBLOCK, KF_FLAG_NONBLOCK }, { FREAD, KF_FLAG_READ }, { FWRITE, KF_FLAG_WRITE }, { O_CREAT, KF_FLAG_CREAT }, { O_DIRECT, KF_FLAG_DIRECT }, { O_EXCL, KF_FLAG_EXCL }, { O_EXEC, KF_FLAG_EXEC }, { O_EXLOCK, KF_FLAG_EXLOCK }, { O_NOFOLLOW, KF_FLAG_NOFOLLOW }, { O_SHLOCK, KF_FLAG_SHLOCK }, { O_TRUNC, KF_FLAG_TRUNC } }; unsigned int i; int kflags; kflags = 0; for (i = 0; i < nitems(fflags_table); i++) if (fflags & fflags_table[i].fflag) kflags |= fflags_table[i].kf_fflag; return (kflags); } /* Trim unused data from kf_path by truncating the structure size. */ void pack_kinfo(struct kinfo_file *kif) { kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + strlen(kif->kf_path) + 1; kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); } static void export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp, struct kinfo_file *kif, struct filedesc *fdp, int flags) { int error; bzero(kif, sizeof(*kif)); /* Set a default type to allow for empty fill_kinfo() methods. */ kif->kf_type = KF_TYPE_UNKNOWN; kif->kf_flags = xlate_fflags(fp->f_flag); if (rightsp != NULL) kif->kf_cap_rights = *rightsp; else cap_rights_init_zero(&kif->kf_cap_rights); kif->kf_fd = fd; kif->kf_ref_count = refcount_load(&fp->f_count); kif->kf_offset = foffset_get(fp); /* * This may drop the filedesc lock, so the 'fp' cannot be * accessed after this call. */ error = fo_fill_kinfo(fp, kif, fdp); if (error == 0) kif->kf_status |= KF_ATTR_VALID; if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) pack_kinfo(kif); else kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); } static void export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags, struct kinfo_file *kif, int flags) { int error; bzero(kif, sizeof(*kif)); kif->kf_type = KF_TYPE_VNODE; error = vn_fill_kinfo_vnode(vp, kif); if (error == 0) kif->kf_status |= KF_ATTR_VALID; kif->kf_flags = xlate_fflags(fflags); cap_rights_init_zero(&kif->kf_cap_rights); kif->kf_fd = fd; kif->kf_ref_count = -1; kif->kf_offset = -1; if ((flags & KERN_FILEDESC_PACK_KINFO) != 0) pack_kinfo(kif); else kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t)); vrele(vp); } struct export_fd_buf { struct filedesc *fdp; struct pwddesc *pdp; struct sbuf *sb; ssize_t remainder; struct kinfo_file kif; int flags; }; static int export_kinfo_to_sb(struct export_fd_buf *efbuf) { struct kinfo_file *kif; kif = &efbuf->kif; if (efbuf->remainder != -1) { if (efbuf->remainder < kif->kf_structsize) return (ENOMEM); efbuf->remainder -= kif->kf_structsize; } if (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) != 0) return (sbuf_error(efbuf->sb)); return (0); } static int export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp, struct export_fd_buf *efbuf) { int error; if (efbuf->remainder == 0) return (ENOMEM); export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp, efbuf->flags); FILEDESC_SUNLOCK(efbuf->fdp); error = export_kinfo_to_sb(efbuf); FILEDESC_SLOCK(efbuf->fdp); return (error); } static int export_vnode_to_sb(struct vnode *vp, int fd, int fflags, struct export_fd_buf *efbuf) { int error; if (efbuf->remainder == 0) return (ENOMEM); if (efbuf->pdp != NULL) PWDDESC_XUNLOCK(efbuf->pdp); export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags); error = export_kinfo_to_sb(efbuf); if (efbuf->pdp != NULL) PWDDESC_XLOCK(efbuf->pdp); return (error); } /* * Store a process file descriptor information to sbuf. * * Takes a locked proc as argument, and returns with the proc unlocked. */ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { struct file *fp; struct filedesc *fdp; struct pwddesc *pdp; struct export_fd_buf *efbuf; struct vnode *cttyvp, *textvp, *tracevp; struct pwd *pwd; int error, i; cap_rights_t rights; PROC_LOCK_ASSERT(p, MA_OWNED); /* ktrace vnode */ tracevp = ktr_get_tracevp(p, true); /* text vnode */ textvp = p->p_textvp; if (textvp != NULL) vrefact(textvp); /* Controlling tty. */ cttyvp = NULL; if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) { cttyvp = p->p_pgrp->pg_session->s_ttyvp; if (cttyvp != NULL) vrefact(cttyvp); } fdp = fdhold(p); pdp = pdhold(p); PROC_UNLOCK(p); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); efbuf->fdp = NULL; efbuf->pdp = NULL; efbuf->sb = sb; efbuf->remainder = maxlen; efbuf->flags = flags; error = 0; if (tracevp != NULL) error = export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE, efbuf); if (error == 0 && textvp != NULL) error = export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf); if (error == 0 && cttyvp != NULL) error = export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE, efbuf); if (error != 0 || pdp == NULL || fdp == NULL) goto fail; efbuf->fdp = fdp; efbuf->pdp = pdp; PWDDESC_XLOCK(pdp); pwd = pwd_hold_pwddesc(pdp); if (pwd != NULL) { /* working directory */ if (pwd->pwd_cdir != NULL) { vrefact(pwd->pwd_cdir); error = export_vnode_to_sb(pwd->pwd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } /* root directory */ if (error == 0 && pwd->pwd_rdir != NULL) { vrefact(pwd->pwd_rdir); error = export_vnode_to_sb(pwd->pwd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); } /* jail directory */ if (error == 0 && pwd->pwd_jdir != NULL) { vrefact(pwd->pwd_jdir); error = export_vnode_to_sb(pwd->pwd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); } } PWDDESC_XUNLOCK(pdp); if (error != 0) goto fail; if (pwd != NULL) pwd_drop(pwd); FILEDESC_SLOCK(fdp); FILEDESC_FOREACH_FP(fdp, i, fp) { if (refcount_load(&fdp->fd_refcnt) == 0) break; #ifdef CAPABILITIES rights = *cap_rights(fdp, i); #else /* !CAPABILITIES */ rights = cap_no_rights; #endif /* * Create sysctl entry. It is OK to drop the filedesc * lock inside of export_file_to_sb() as we will * re-validate and re-evaluate its properties when the * loop continues. */ error = export_file_to_sb(fp, i, &rights, efbuf); if (error != 0) break; } FILEDESC_SUNLOCK(fdp); fail: if (fdp != NULL) fddrop(fdp); if (pdp != NULL) pddrop(pdp); free(efbuf, M_TEMP); return (error); } #define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5) /* * Get per-process file descriptors for use by procstat(1), et al. */ static int sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct proc *p; ssize_t maxlen; u_int namelen; int error, error2, *name; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; error = kern_proc_filedesc_out(p, &sb, maxlen, KERN_FILEDESC_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } #ifdef COMPAT_FREEBSD7 #ifdef KINFO_OFILE_SIZE CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE); #endif static void kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif) { okif->kf_structsize = sizeof(*okif); okif->kf_type = kif->kf_type; okif->kf_fd = kif->kf_fd; okif->kf_ref_count = kif->kf_ref_count; okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE | KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK | KF_FLAG_DIRECT | KF_FLAG_HASLOCK); okif->kf_offset = kif->kf_offset; if (kif->kf_type == KF_TYPE_VNODE) okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type; else okif->kf_vnode_type = KF_VTYPE_VNON; strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path)); if (kif->kf_type == KF_TYPE_SOCKET) { okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0; okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0; okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0; okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local; okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer; } else { okif->kf_sa_local.ss_family = AF_UNSPEC; okif->kf_sa_peer.ss_family = AF_UNSPEC; } } static int export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif, struct kinfo_ofile *okif, struct pwddesc *pdp, struct sysctl_req *req) { int error; vrefact(vp); PWDDESC_XUNLOCK(pdp); export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO); kinfo_to_okinfo(kif, okif); error = SYSCTL_OUT(req, okif, sizeof(*okif)); PWDDESC_XLOCK(pdp); return (error); } /* * Get per-process file descriptors for use by procstat(1), et al. */ static int sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) { struct kinfo_ofile *okif; struct kinfo_file *kif; struct filedesc *fdp; struct pwddesc *pdp; struct pwd *pwd; u_int namelen; int error, i, *name; struct file *fp; struct proc *p; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) return (error); fdp = fdhold(p); if (fdp != NULL) pdp = pdhold(p); PROC_UNLOCK(p); if (fdp == NULL || pdp == NULL) { if (fdp != NULL) fddrop(fdp); return (ENOENT); } kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); PWDDESC_XLOCK(pdp); pwd = pwd_hold_pwddesc(pdp); if (pwd != NULL) { if (pwd->pwd_cdir != NULL) export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif, okif, pdp, req); if (pwd->pwd_rdir != NULL) export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif, okif, pdp, req); if (pwd->pwd_jdir != NULL) export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif, okif, pdp, req); } PWDDESC_XUNLOCK(pdp); if (pwd != NULL) pwd_drop(pwd); FILEDESC_SLOCK(fdp); FILEDESC_FOREACH_FP(fdp, i, fp) { if (refcount_load(&fdp->fd_refcnt) == 0) break; export_file_to_kinfo(fp, i, NULL, kif, fdp, KERN_FILEDESC_PACK_KINFO); FILEDESC_SUNLOCK(fdp); kinfo_to_okinfo(kif, okif); error = SYSCTL_OUT(req, okif, sizeof(*okif)); FILEDESC_SLOCK(fdp); if (error) break; } FILEDESC_SUNLOCK(fdp); fddrop(fdp); pddrop(pdp); free(kif, M_TEMP); free(okif, M_TEMP); return (0); } static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc, "Process ofiledesc entries"); #endif /* COMPAT_FREEBSD7 */ int vntype_to_kinfo(int vtype) { struct { int vtype; int kf_vtype; } vtypes_table[] = { { VBAD, KF_VTYPE_VBAD }, { VBLK, KF_VTYPE_VBLK }, { VCHR, KF_VTYPE_VCHR }, { VDIR, KF_VTYPE_VDIR }, { VFIFO, KF_VTYPE_VFIFO }, { VLNK, KF_VTYPE_VLNK }, { VNON, KF_VTYPE_VNON }, { VREG, KF_VTYPE_VREG }, { VSOCK, KF_VTYPE_VSOCK } }; unsigned int i; /* * Perform vtype translation. */ for (i = 0; i < nitems(vtypes_table); i++) if (vtypes_table[i].vtype == vtype) return (vtypes_table[i].kf_vtype); return (KF_VTYPE_UNKNOWN); } static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc, "Process filedesc entries"); /* * Store a process current working directory information to sbuf. * * Takes a locked proc as argument, and returns with the proc unlocked. */ int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) { struct pwddesc *pdp; struct pwd *pwd; struct export_fd_buf *efbuf; struct vnode *cdir; int error; PROC_LOCK_ASSERT(p, MA_OWNED); pdp = pdhold(p); PROC_UNLOCK(p); if (pdp == NULL) return (EINVAL); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); efbuf->fdp = NULL; efbuf->pdp = pdp; efbuf->sb = sb; efbuf->remainder = maxlen; efbuf->flags = 0; PWDDESC_XLOCK(pdp); pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); cdir = pwd->pwd_cdir; if (cdir == NULL) { error = EINVAL; } else { vrefact(cdir); error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } PWDDESC_XUNLOCK(pdp); pddrop(pdp); free(efbuf, M_TEMP); return (error); } /* * Get per-process current working directory. */ static int sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct proc *p; ssize_t maxlen; u_int namelen; int error, error2, *name; namelen = arg2; if (namelen != 1) return (EINVAL); name = (int *)arg1; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p); if (error != 0) { sbuf_delete(&sb); return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; error = kern_proc_cwd_out(p, &sb, maxlen); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_cwd, "Process current working directory"); #ifdef DDB /* * For the purposes of debugging, generate a human-readable string for the * file type. */ static const char * file_type_to_name(short type) { switch (type) { case 0: return ("zero"); case DTYPE_VNODE: return ("vnode"); case DTYPE_SOCKET: return ("socket"); case DTYPE_PIPE: return ("pipe"); case DTYPE_FIFO: return ("fifo"); case DTYPE_KQUEUE: return ("kqueue"); case DTYPE_CRYPTO: return ("crypto"); case DTYPE_MQUEUE: return ("mqueue"); case DTYPE_SHM: return ("shm"); case DTYPE_SEM: return ("ksem"); case DTYPE_PTS: return ("pts"); case DTYPE_DEV: return ("dev"); case DTYPE_PROCDESC: return ("proc"); case DTYPE_EVENTFD: return ("eventfd"); case DTYPE_LINUXTFD: return ("ltimer"); default: return ("unkn"); } } /* * For the purposes of debugging, identify a process (if any, perhaps one of * many) that references the passed file in its file descriptor array. Return * NULL if none. */ static struct proc * file_to_first_proc(struct file *fp) { struct filedesc *fdp; struct proc *p; int n; FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state == PRS_NEW) continue; fdp = p->p_fd; if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } return (NULL); } static void db_print_file(struct file *fp, int header) { #define XPTRWIDTH ((int)howmany(sizeof(void *) * NBBY, 4)) struct proc *p; if (header) db_printf("%*s %6s %*s %8s %4s %5s %6s %*s %5s %s\n", XPTRWIDTH, "File", "Type", XPTRWIDTH, "Data", "Flag", "GCFl", "Count", "MCount", XPTRWIDTH, "Vnode", "FPID", "FCmd"); p = file_to_first_proc(fp); db_printf("%*p %6s %*p %08x %04x %5d %6d %*p %5d %s\n", XPTRWIDTH, fp, file_type_to_name(fp->f_type), XPTRWIDTH, fp->f_data, fp->f_flag, 0, refcount_load(&fp->f_count), 0, XPTRWIDTH, fp->f_vnode, p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); #undef XPTRWIDTH } DB_SHOW_COMMAND(file, db_show_file) { struct file *fp; if (!have_addr) { db_printf("usage: show file \n"); return; } fp = (struct file *)addr; db_print_file(fp, 1); } DB_SHOW_COMMAND(files, db_show_files) { struct filedesc *fdp; struct file *fp; struct proc *p; int header; int n; header = 1; FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state == PRS_NEW) continue; if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; } } } #endif SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, &maxfilesperproc, 0, "Maximum files allowed open per process"); SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "Maximum number of files"); SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, &openfiles, 0, "System-wide number of open files"); /* ARGSUSED*/ static void filelistinit(void *dummy) { file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); /* * XXXMJG this is a temporary hack due to boot ordering issues against * the vnode zone. */ vfs_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); /*-------------------------------------------------------------------*/ static int badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EBADF); } static int badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { return (EINVAL); } static int badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { return (0); } static int badfo_kqfilter(struct file *fp, struct knote *kn) { return (EBADF); } static int badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) { return (EBADF); } static int badfo_close(struct file *fp, struct thread *td) { return (0); } static int badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { return (EBADF); } static int badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { return (EBADF); } static int badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { return (0); } struct fileops badfileops = { .fo_read = badfo_readwrite, .fo_write = badfo_readwrite, .fo_truncate = badfo_truncate, .fo_ioctl = badfo_ioctl, .fo_poll = badfo_poll, .fo_kqfilter = badfo_kqfilter, .fo_stat = badfo_stat, .fo_close = badfo_close, .fo_chmod = badfo_chmod, .fo_chown = badfo_chown, .fo_sendfile = badfo_sendfile, .fo_fill_kinfo = badfo_fill_kinfo, }; static int path_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { return (POLLNVAL); } static int path_close(struct file *fp, struct thread *td) { MPASS(fp->f_type == DTYPE_VNODE); fp->f_ops = &badfileops; vdrop(fp->f_vnode); return (0); } struct fileops path_fileops = { .fo_read = badfo_readwrite, .fo_write = badfo_readwrite, .fo_truncate = badfo_truncate, .fo_ioctl = badfo_ioctl, .fo_poll = path_poll, .fo_kqfilter = vn_kqfilter_opath, .fo_stat = vn_statfile, .fo_close = path_close, .fo_chmod = badfo_chmod, .fo_chown = badfo_chown, .fo_sendfile = badfo_sendfile, .fo_fill_kinfo = vn_fill_kinfo, .fo_flags = DFLAG_PASSABLE, }; int invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EOPNOTSUPP); } int invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { return (ENOTTY); } int invfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { return (poll_no_poll(events)); } int invfo_kqfilter(struct file *fp, struct knote *kn) { return (EINVAL); } int invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { return (EINVAL); } int invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { return (EINVAL); } /*-------------------------------------------------------------------*/ /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. * * XXX: we could give this one a cloning event handler if necessary. */ /* ARGSUSED */ static int fdopen(struct cdev *dev, int mode, int type, struct thread *td) { /* * XXX Kludge: set curthread->td_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ td->td_dupfd = dev2unit(dev); return (ENODEV); } static struct cdevsw fildesc_cdevsw = { .d_version = D_VERSION, .d_open = fdopen, .d_name = "FD", }; static void fildesc_drvinit(void *unused) { struct cdev *dev; dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/0"); make_dev_alias(dev, "stdin"); dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/1"); make_dev_alias(dev, "stdout"); dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL, UID_ROOT, GID_WHEEL, 0666, "fd/2"); make_dev_alias(dev, "stderr"); } SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL); diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 62dde37c0c1f..c66a34536444 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -1,363 +1,363 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include #include #include #include #include struct __wrusage; struct file; struct filecaps; enum idtype; struct itimerval; struct image_args; struct jail; struct kevent; struct kevent_copyops; struct kld_file_stat; struct ksiginfo; struct mbuf; struct msghdr; struct msqid_ds; struct pollfd; struct ogetdirentries_args; struct rlimit; struct rusage; struct sched_param; union semun; struct sockaddr; struct spacectl_range; struct stat; struct thr_param; struct timex; struct uio; struct vm_map; struct vmspace; typedef int (*mmap_check_fp_fn)(struct file *, int, int, int); struct mmap_req { vm_offset_t mr_hint; vm_size_t mr_len; int mr_prot; int mr_flags; int mr_fd; off_t mr_pos; mmap_check_fp_fn mr_check_fp_fn; }; int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen, size_t path_max); int kern_abort2(struct thread *td, const char *why, int nargs, void **uargs); int kern_accept(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, struct file **fp); int kern_accept4(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, int flags, struct file **fp); int kern_accessat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_alternate_path(const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd); int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_break(struct thread *td, uintptr_t *addr); int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds); int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights); int kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg); int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id); int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts); int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqtp, struct timespec *rmtp); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats); void kern_thread_cputime(struct thread *targettd, struct timespec *ats); void kern_process_cputime(struct proc *targetp, struct timespec *ats); -int kern_close_range(struct thread *td, u_int lowfd, u_int highfd); +int kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd); int kern_close(struct thread *td, int fd); int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, off_t *outoffp, size_t len, unsigned int flags); int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp); int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, const cpuset_t *maskp); int kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp); int kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, const domainset_t *maskp, int policy); int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid); int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid); int kern_dup(struct thread *td, u_int mode, int flags, int old, int new); int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); int kern_fchmodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, mode_t mode, int flag); int kern_fchownat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int uid, int gid, int flag); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fpathconf(struct thread *td, int fd, int name, long *valuep); int kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs *ubuf, long bufsize, int mode); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, const struct timeval *tptr, enum uio_seg tptrseg); int kern_futimens(struct thread *td, int fd, const struct timespec *tptr, enum uio_seg tptrseg); int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, off_t *basep, ssize_t *residp, enum uio_seg bufseg); int kern_getfhat(struct thread *td, int flags, int fd, const char *path, enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getppid(struct thread *); int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getpriority(struct thread *td, int which, int who); int kern_getrusage(struct thread *td, int who, struct rusage *rup); int kern_getsid(struct thread *td, pid_t pid); int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops); int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kill(struct thread *td, pid_t pid, int signum); int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); int kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, const char *path2, enum uio_seg segflg, int flag); int kern_listen(struct thread *td, int s, int backlog); int kern_lseek(struct thread *td, int fd, off_t offset, int whence); int kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg); int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav); int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec); int kern_minherit(struct thread *td, uintptr_t addr, size_t len, int inherit); int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode); int kern_mkfifoat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode, dev_t dev); int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr, size_t len); int kern_mmap(struct thread *td, const struct mmap_req *mrp); int kern_mmap_racct_check(struct thread *td, struct vm_map *map, vm_size_t size); int kern_mmap_maxprot(struct proc *p, int prot); int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); int kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags); int kern_munlock(struct thread *td, uintptr_t addr, size_t size); int kern_munmap(struct thread *td, uintptr_t addr, size_t size); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_ntp_adjtime(struct thread *td, struct timex *ntv, int *retvalp); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); int kern_ommap(struct thread *td, uintptr_t hint, int len, int oprot, int oflags, int fd, long pos); int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, int name, u_long flags, long *valuep); int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2); int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); int kern_poll_kfds(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); bool kern_poll_maxfds(u_int nfds); int kern_posix_error(struct thread *td, int error); int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); int kern_fspacectl(struct thread *td, int fd, int cmd, const struct spacectl_range *, int flags, struct spacectl_range *); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, off_t offset); int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_readlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp); int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg); int kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, int *policy); int kern_sched_setparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_setscheduler(struct thread *td, struct thread *targettd, int policy, struct sched_param *param); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd, struct timespec *ts); int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg); int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); int kern_setpriority(struct thread *td, int which, int who, int prio); int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, const void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shm_open2(struct thread *td, const char *path, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, const char *name); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_shutdown(struct thread *td, int s, int how); int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_sigtimedwait(struct thread *td, sigset_t waitset, struct ksiginfo *ksi, struct timespec *timeout); int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value); int kern_socket(struct thread *td, int domain, int type, int protocol); int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); int kern_specialfd(struct thread *td, int type, void *arg); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, enum uio_seg segflg); int kern_sync(struct thread *td); int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id); int kern_ktimer_delete(struct thread *, int); int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval); int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val); int kern_ktimer_getoverrun(struct thread *td, int timer_id); int kern_thr_alloc(struct proc *, int pages, struct thread **); int kern_thr_exit(struct thread *td); int kern_thr_new(struct thread *td, struct thr_param *param); int kern_thr_suspend(struct thread *td, struct timespec *tsp); int kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, off_t length); int kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, int flag); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status, int options, struct __wrusage *wrup, siginfo_t *sip); int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); int kern_unmount(struct thread *td, const char *path, int flags); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ struct freebsd11_dirent; int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, long *basep, void (*func)(struct freebsd11_dirent *)); #endif /* !_SYS_SYSCALLSUBR_H_ */ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index be71edbd6e23..449d35678275 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -1,205 +1,210 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)unistd.h 8.2 (Berkeley) 1/7/94 * $FreeBSD$ */ #ifndef _SYS_UNISTD_H_ #define _SYS_UNISTD_H_ #include /* * POSIX options and option groups we unconditionally do or don't * implement. Those options which are implemented (or not) entirely * in user mode are defined in . Please keep this list in * alphabetical order. * * Anything which is defined as zero below **must** have an * implementation for the corresponding sysconf() which is able to * determine conclusively whether or not the feature is supported. * Anything which is defined as other than -1 below **must** have * complete headers, types, and function declarations as specified by * the POSIX standard; however, if the relevant sysconf() function * returns -1, the functions may be stubbed out. */ #define _POSIX_ADVISORY_INFO 200112L #define _POSIX_ASYNCHRONOUS_IO 200112L #define _POSIX_CHOWN_RESTRICTED 1 #define _POSIX_CLOCK_SELECTION (-1) #define _POSIX_CPUTIME 200112L #define _POSIX_FSYNC 200112L #define _POSIX_IPV6 0 #define _POSIX_JOB_CONTROL 1 #define _POSIX_MAPPED_FILES 200112L #define _POSIX_MEMLOCK (-1) #define _POSIX_MEMLOCK_RANGE 200112L #define _POSIX_MEMORY_PROTECTION 200112L #define _POSIX_MESSAGE_PASSING 200112L #define _POSIX_MONOTONIC_CLOCK 200112L #define _POSIX_NO_TRUNC 1 #define _POSIX_PRIORITIZED_IO (-1) #define _POSIX_PRIORITY_SCHEDULING 0 #define _POSIX_RAW_SOCKETS 200112L #define _POSIX_REALTIME_SIGNALS 200112L #define _POSIX_SEMAPHORES 200112L #define _POSIX_SHARED_MEMORY_OBJECTS 200112L #define _POSIX_SPORADIC_SERVER (-1) #define _POSIX_SYNCHRONIZED_IO (-1) #define _POSIX_TIMEOUTS 200112L #define _POSIX_TIMERS 200112L #define _POSIX_TYPED_MEMORY_OBJECTS (-1) #define _POSIX_VDISABLE 0xff #if __XSI_VISIBLE #define _XOPEN_SHM 1 #define _XOPEN_STREAMS (-1) #endif /* * Although we have saved user/group IDs, we do not use them in setuid * as described in POSIX 1003.1, because the feature does not work for * root. We use the saved IDs in seteuid/setegid, which are not currently * part of the POSIX 1003.1 specification. XXX revisit for 1003.1-2001 * as this is now mandatory. */ #ifdef _NOT_AVAILABLE #define _POSIX_SAVED_IDS 1 /* saved set-user-ID and set-group-ID */ #endif /* Define the POSIX.1 version we target for compliance. */ #define _POSIX_VERSION 200112L /* access function */ #define F_OK 0 /* test for existence of file */ #define X_OK 0x01 /* test for execute or search permission */ #define W_OK 0x02 /* test for write permission */ #define R_OK 0x04 /* test for read permission */ /* whence values for lseek(2) */ #ifndef SEEK_SET #define SEEK_SET 0 /* set file offset to offset */ #define SEEK_CUR 1 /* set file offset to current plus offset */ #define SEEK_END 2 /* set file offset to EOF plus offset */ #endif #if __BSD_VISIBLE #define SEEK_DATA 3 /* set file offset to next data past offset */ #define SEEK_HOLE 4 /* set file offset to next hole past offset */ #endif #ifndef _POSIX_SOURCE /* whence values for lseek(2); renamed by POSIX 1003.1 */ #define L_SET SEEK_SET #define L_INCR SEEK_CUR #define L_XTND SEEK_END #endif /* configurable pathname variables */ #define _PC_LINK_MAX 1 #define _PC_MAX_CANON 2 #define _PC_MAX_INPUT 3 #define _PC_NAME_MAX 4 #define _PC_PATH_MAX 5 #define _PC_PIPE_BUF 6 #define _PC_CHOWN_RESTRICTED 7 #define _PC_NO_TRUNC 8 #define _PC_VDISABLE 9 #if __POSIX_VISIBLE >= 199309 #define _PC_ASYNC_IO 53 #define _PC_PRIO_IO 54 #define _PC_SYNC_IO 55 #endif #if __POSIX_VISIBLE >= 200112 #define _PC_ALLOC_SIZE_MIN 10 #define _PC_FILESIZEBITS 12 #define _PC_REC_INCR_XFER_SIZE 14 #define _PC_REC_MAX_XFER_SIZE 15 #define _PC_REC_MIN_XFER_SIZE 16 #define _PC_REC_XFER_ALIGN 17 #define _PC_SYMLINK_MAX 18 #endif #if __BSD_VISIBLE #define _PC_ACL_EXTENDED 59 #define _PC_ACL_PATH_MAX 60 #define _PC_CAP_PRESENT 61 #define _PC_INF_PRESENT 62 #define _PC_MAC_PRESENT 63 #define _PC_ACL_NFS4 64 #define _PC_DEALLOC_PRESENT 65 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ #define _PC_MIN_HOLE_SIZE 21 #if __BSD_VISIBLE /* * rfork() options. * * XXX currently, some operations without RFPROC set are not supported. */ #define RFNAMEG (1<<0) /* UNIMPL new plan9 `name space' */ #define RFENVG (1<<1) /* UNIMPL copy plan9 `env space' */ #define RFFDG (1<<2) /* copy fd table */ #define RFNOTEG (1<<3) /* UNIMPL create new plan9 `note group' */ #define RFPROC (1<<4) /* change child (else changes curproc) */ #define RFMEM (1<<5) /* share `address space' */ #define RFNOWAIT (1<<6) /* give child to init */ #define RFCNAMEG (1<<10) /* UNIMPL zero plan9 `name space' */ #define RFCENVG (1<<11) /* UNIMPL zero plan9 `env space' */ #define RFCFDG (1<<12) /* close all fds, zero fd table */ #define RFTHREAD (1<<13) /* enable kernel thread support */ #define RFSIGSHARE (1<<14) /* share signal handlers */ #define RFLINUXTHPN (1<<16) /* do linux clone exit parent notification */ #define RFSTOPPED (1<<17) /* leave child in a stopped state */ #define RFHIGHPID (1<<18) /* use a pid higher than 10 (idleproc) */ #define RFTSIGZMB (1<<19) /* select signal for exit parent notification */ #define RFTSIGSHIFT 20 /* selected signal number is in bits 20-27 */ #define RFTSIGMASK 0xFF #define RFTSIGNUM(flags) (((flags) >> RFTSIGSHIFT) & RFTSIGMASK) #define RFTSIGFLAGS(signum) ((signum) << RFTSIGSHIFT) #define RFPROCDESC (1<<28) /* return a process descriptor */ /* kernel: parent sleeps until child exits (vfork) */ #define RFPPWAIT (1<<31) /* user: vfork(2) semantics, clear signals */ #define RFSPAWN (1U<<31) #define RFFLAGS (RFFDG | RFPROC | RFMEM | RFNOWAIT | RFCFDG | \ RFTHREAD | RFSIGSHARE | RFLINUXTHPN | RFSTOPPED | RFHIGHPID | RFTSIGZMB | \ RFPROCDESC | RFSPAWN | RFPPWAIT) #define RFKERNELONLY (RFSTOPPED | RFHIGHPID | RFPROCDESC) #define SWAPOFF_FORCE 0x00000001 +/* + * close_range() options. + */ +#define CLOSE_RANGE_CLOEXEC (1<<2) + #endif /* __BSD_VISIBLE */ #endif /* !_SYS_UNISTD_H_ */ diff --git a/tests/sys/file/closefrom_test.c b/tests/sys/file/closefrom_test.c index 8516f7f2598c..d11faa22b55d 100644 --- a/tests/sys/file/closefrom_test.c +++ b/tests/sys/file/closefrom_test.c @@ -1,329 +1,362 @@ /*- * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Regression tests for the closefrom(2) system call. */ #include #include #include #include #include #include #include #include #include #include #include #include #include struct shared_info { int failed; char tag[64]; char message[0]; }; static int test = 1; static void ok(const char *descr) { printf("ok %d - %s\n", test, descr); test++; } static void fail(const char *descr, const char *fmt, ...) { va_list ap; printf("not ok %d - %s", test, descr); test++; if (fmt) { va_start(ap, fmt); printf(" # "); vprintf(fmt, ap); va_end(ap); } printf("\n"); exit(1); } #define fail_err(descr) fail((descr), "%s", strerror(errno)) static void cok(struct shared_info *info, const char *descr) { info->failed = 0; strlcpy(info->tag, descr, sizeof(info->tag)); exit(0); } static void cfail(struct shared_info *info, const char *descr, const char *fmt, ...) { va_list ap; info->failed = 1; strlcpy(info->tag, descr, sizeof(info->tag)); if (fmt) { va_start(ap, fmt); vsprintf(info->message, fmt, ap); va_end(ap); } exit(0); } #define cfail_err(info, descr) cfail((info), (descr), "%s", strerror(errno)) /* * Use kinfo_getfile() to fetch the list of file descriptors and figure out * the highest open file descriptor. */ static int highest_fd(void) { struct kinfo_file *kif; int cnt, i, highest; kif = kinfo_getfile(getpid(), &cnt); if (kif == NULL) fail_err("kinfo_getfile"); highest = INT_MIN; for (i = 0; i < cnt; i++) if (kif[i].kf_fd > highest) highest = kif[i].kf_fd; free(kif); return (highest); } static int devnull(void) { int fd; fd = open(_PATH_DEVNULL, O_RDONLY); if (fd < 0) fail_err("open(\" "_PATH_DEVNULL" \")"); return (fd); } int main(void) { struct shared_info *info; pid_t pid; - int fd, i, start; + int fd, flags, i, start; printf("1..20\n"); /* We better start up with fd's 0, 1, and 2 open. */ start = devnull(); if (start == -1) fail("open", "bad descriptor %d", start); ok("open"); /* Make sure highest_fd() works. */ fd = highest_fd(); if (start != fd) fail("highest_fd", "bad descriptor %d != %d", start, fd); ok("highest_fd"); /* Try to use closefrom() for just closing fd 3. */ closefrom(start + 1); fd = highest_fd(); if (fd != start) fail("closefrom", "highest fd %d", fd); ok("closefrom"); /* Eat up 16 descriptors. */ for (i = 0; i < 16; i++) (void)devnull(); fd = highest_fd(); if (fd != start + 16) fail("open 16", "highest fd %d", fd); ok("open 16"); /* Close half of them. */ closefrom(11); fd = highest_fd(); if (fd != 10) fail("closefrom", "highest fd %d", fd); ok("closefrom"); /* Explicitly close descriptors 6 and 8 to create holes. */ if (close(6) < 0 || close(8) < 0) fail_err("close2 "); ok("close 2"); /* Verify that close on 6 and 8 fails with EBADF. */ if (close(6) == 0) fail("close(6)", "did not fail"); if (errno != EBADF) fail_err("close(6)"); ok("close(6)"); if (close(8) == 0) fail("close(8)", "did not fail"); if (errno != EBADF) fail_err("close(8)"); ok("close(8)"); /* Close from 4 on. */ closefrom(4); fd = highest_fd(); if (fd != 3) fail("closefrom", "highest fd %d", fd); ok("closefrom"); /* Allocate a small SHM region for IPC with our child. */ info = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); if (info == MAP_FAILED) fail_err("mmap"); ok("mmap"); /* Fork a child process to test closefrom(0). */ pid = fork(); if (pid < 0) fail_err("fork"); if (pid == 0) { /* Child. */ closefrom(0); fd = highest_fd(); if (fd >= 0) cfail(info, "closefrom(0)", "highest fd %d", fd); cok(info, "closefrom(0)"); } if (wait(NULL) < 0) fail_err("wait"); if (info->failed) fail(info->tag, "%s", info->message); ok(info->tag); /* Fork a child process to test closefrom(-1). */ pid = fork(); if (pid < 0) fail_err("fork"); if (pid == 0) { /* Child. */ closefrom(-1); fd = highest_fd(); if (fd >= 0) cfail(info, "closefrom(-1)", "highest fd %d", fd); cok(info, "closefrom(-1)"); } if (wait(NULL) < 0) fail_err("wait"); if (info->failed) fail(info->tag, "%s", info->message); ok(info->tag); /* Dup stdout to 6. */ if (dup2(1, 6) < 0) fail_err("dup2"); fd = highest_fd(); if (fd != 6) fail("dup2", "highest fd %d", fd); ok("dup2"); /* Do a closefrom() starting in a hole. */ closefrom(4); fd = highest_fd(); if (fd != 3) fail("closefrom", "highest fd %d", fd); ok("closefrom"); /* Do a closefrom() beyond our highest open fd. */ closefrom(32); fd = highest_fd(); if (fd != 3) fail("closefrom", "highest fd %d", fd); ok("closefrom"); /* Chew up another 8 fd */ for (i = 0; i < 8; i++) (void)devnull(); fd = highest_fd(); start = fd - 7; /* close_range() a hole in the middle */ close_range(start + 3, start + 5, 0); for (i = start + 3; i < start + 6; ++i) { if (close(i) == 0 || errno != EBADF) { --i; break; } } if (i != start + 6) fail("close_range", "failed to close at %d in %d - %d", i + 1, start + 3, start + 6); ok("close_range"); /* close_range from the middle of the hole */ close_range(start + 4, start + 6, 0); if ((i = highest_fd()) != fd) fail("close_range", "highest fd %d", i); ok("close_range"); /* close_range to the end; effectively closefrom(2) */ close_range(start + 3, ~0L, 0); if ((i = highest_fd()) != start + 2) fail("close_range", "highest fd %d", i); ok("close_range"); /* Now close the rest */ close_range(start, start + 4, 0); fd = highest_fd(); if (fd != 3) fail("close_range", "highest fd %d", fd); ok("close_range"); /* Fork a child process to test closefrom(0) twice. */ pid = fork(); if (pid < 0) fail_err("fork"); if (pid == 0) { /* Child. */ closefrom(0); closefrom(0); cok(info, "closefrom(0)"); } if (wait(NULL) < 0) fail_err("wait"); if (info->failed) fail(info->tag, "%s", info->message); ok(info->tag); + /* test CLOSE_RANGE_CLOEXEC */ + for (i = 0; i < 8; i++) + (void)devnull(); + fd = highest_fd(); + start = fd - 8; + if (close_range(start + 1, start + 4, CLOSE_RANGE_CLOEXEC) < 0) + fail_err("close_range(..., CLOSE_RANGE_CLOEXEC)"); + flags = fcntl(start, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOEXEC) != 0) + fail("close_range", "CLOSE_RANGE_CLOEXEC set close-on-exec " + "when it should not have on fd %d", start); + for (i = start + 1; i <= start + 4; i++) { + flags = fcntl(i, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOEXEC) == 0) + fail("close_range", "CLOSE_RANGE_CLOEXEC did not set " + "close-on-exec on fd %d", i); + } + for (; i < start + 8; i++) { + flags = fcntl(i, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOEXEC) != 0) + fail("close_range", "CLOSE_RANGE_CLOEXEC set close-on-exec " + "when it should not have on fd %d", i); + } + if (close_range(start, start + 8, 0) < 0) + fail_err("close_range"); + ok("close_range(..., CLOSE_RANGE_CLOEXEC)"); + return (0); } diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 611c9021cd78..845e2000a4c2 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1,2214 +1,2222 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1988, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)kdump.c 8.1 (Berkeley) 6/6/93"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #define _WANT_KERNEL_ERRNO #ifdef __LP64__ #define _WANT_KEVENT32 #endif #define _WANT_FREEBSD11_KEVENT #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef WITH_CASPER #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ktrace.h" #ifdef WITH_CASPER #include #include #include #endif int fetchprocinfo(struct ktr_header *, u_int *); u_int findabi(struct ktr_header *); int fread_tail(void *, int, int); void dumpheader(struct ktr_header *, u_int); void ktrsyscall(struct ktr_syscall *, u_int); void ktrsysret(struct ktr_sysret *, u_int); void ktrnamei(char *, int); void hexdump(char *, int, int); void visdump(char *, int, int); void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); void ktrsockaddr(struct sockaddr *); void ktrstat(struct stat *); void ktrstruct(char *, size_t); void ktrcapfail(struct ktr_cap_fail *); void ktrfault(struct ktr_fault *); void ktrfaultend(struct ktr_faultend *); void ktrkevent(struct kevent *); void ktrstructarray(struct ktr_struct_array *, size_t); void usage(void); #define TIMESTAMP_NONE 0x0 #define TIMESTAMP_ABSOLUTE 0x1 #define TIMESTAMP_ELAPSED 0x2 #define TIMESTAMP_RELATIVE 0x4 static bool abiflag, decimal, fancy = true, resolv, suppressdata, syscallno, tail, threads; static int timestamp, maxdata; static const char *tracefile = DEF_TRACEFILE; static struct ktr_header ktr_header; #define TIME_FORMAT "%b %e %T %Y" #define eqs(s1, s2) (strcmp((s1), (s2)) == 0) #define print_number64(first,i,n,c) do { \ uint64_t __v; \ \ if (quad_align && (((ptrdiff_t)((i) - (first))) & 1) == 1) { \ (i)++; \ (n)--; \ } \ if (quad_slots == 2) \ __v = (uint64_t)(uint32_t)(i)[0] | \ ((uint64_t)(uint32_t)(i)[1]) << 32; \ else \ __v = (uint64_t)*(i); \ if (decimal) \ printf("%c%jd", (c), (intmax_t)__v); \ else \ printf("%c%#jx", (c), (uintmax_t)__v); \ (i) += quad_slots; \ (n) -= quad_slots; \ (c) = ','; \ } while (0) #define print_number(i,n,c) do { \ if (decimal) \ printf("%c%jd", c, (intmax_t)*i); \ else \ printf("%c%#jx", c, (uintmax_t)(u_register_t)*i); \ i++; \ n--; \ c = ','; \ } while (0) struct proc_info { TAILQ_ENTRY(proc_info) info; u_int sv_flags; pid_t pid; }; static TAILQ_HEAD(trace_procs, proc_info) trace_procs; #ifdef WITH_CASPER static cap_channel_t *cappwd, *capgrp; static int cappwdgrp_setup(cap_channel_t **cappwdp, cap_channel_t **capgrpp) { cap_channel_t *capcas, *cappwdloc, *capgrploc; const char *cmds[1], *fields[1]; capcas = cap_init(); if (capcas == NULL) { err(1, "unable to create casper process"); exit(1); } cappwdloc = cap_service_open(capcas, "system.pwd"); capgrploc = cap_service_open(capcas, "system.grp"); /* Casper capability no longer needed. */ cap_close(capcas); if (cappwdloc == NULL || capgrploc == NULL) { if (cappwdloc == NULL) warn("unable to open system.pwd service"); if (capgrploc == NULL) warn("unable to open system.grp service"); exit(1); } /* Limit system.pwd to only getpwuid() function and pw_name field. */ cmds[0] = "getpwuid"; if (cap_pwd_limit_cmds(cappwdloc, cmds, 1) < 0) err(1, "unable to limit system.pwd service"); fields[0] = "pw_name"; if (cap_pwd_limit_fields(cappwdloc, fields, 1) < 0) err(1, "unable to limit system.pwd service"); /* Limit system.grp to only getgrgid() function and gr_name field. */ cmds[0] = "getgrgid"; if (cap_grp_limit_cmds(capgrploc, cmds, 1) < 0) err(1, "unable to limit system.grp service"); fields[0] = "gr_name"; if (cap_grp_limit_fields(capgrploc, fields, 1) < 0) err(1, "unable to limit system.grp service"); *cappwdp = cappwdloc; *capgrpp = capgrploc; return (0); } #endif /* WITH_CASPER */ static void print_integer_arg(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("", value); else printf("", value); } } /* Like print_integer_arg but unknown values are treated as valid. */ static void print_integer_arg_valid(const char *(*decoder)(int), int value) { const char *str; str = decoder(value); if (str != NULL) printf("%s", str); else { if (decimal) printf("%d", value); else printf("%#x", value); } } static bool print_mask_arg_part(bool (*decoder)(FILE *, int, int *), int value, int *rem) { printf("%#x<", value); return (decoder(stdout, value, rem)); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; invalid = !print_mask_arg_part(decoder, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg0(bool (*decoder)(FILE *, int, int *), int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_fileflags(fflags_t value) { bool invalid; fflags_t rem; if (value == 0) { printf("0"); return; } printf("%#x<", value); invalid = !sysdecode_fileflags(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void decode_filemode(int value) { bool invalid; int rem; if (value == 0) { printf("0"); return; } printf("%#o<", value); invalid = !sysdecode_filemode(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), uint32_t value) { bool invalid; uint32_t rem; printf("%#x<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%u", rem); } static void print_mask_argul(bool (*decoder)(FILE *, u_long, u_long *), u_long value) { bool invalid; u_long rem; if (value == 0) { printf("0"); return; } printf("%#lx<", value); invalid = !decoder(stdout, value, &rem); printf(">"); if (invalid) printf("%lu", rem); } int main(int argc, char *argv[]) { int ch, ktrlen, size; void *m; int trpoints = ALL_POINTS; int drop_logged; pid_t pid = 0; u_int sv_flags; setlocale(LC_CTYPE, ""); timestamp = TIMESTAMP_NONE; while ((ch = getopt(argc,argv,"f:dElm:np:AHRrSsTt:")) != -1) switch (ch) { case 'A': abiflag = true; break; case 'f': tracefile = optarg; break; case 'd': decimal = true; break; case 'l': tail = true; break; case 'm': maxdata = atoi(optarg); break; case 'n': fancy = false; break; case 'p': pid = atoi(optarg); break; case 'r': resolv = true; break; case 'S': syscallno = true; break; case 's': suppressdata = true; break; case 'E': timestamp |= TIMESTAMP_ELAPSED; break; case 'H': threads = true; break; case 'R': timestamp |= TIMESTAMP_RELATIVE; break; case 'T': timestamp |= TIMESTAMP_ABSOLUTE; break; case 't': trpoints = getpoints(optarg); if (trpoints < 0) errx(1, "unknown trace point in %s", optarg); break; default: usage(); } if (argc > optind) usage(); m = malloc(size = 1025); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); if (strcmp(tracefile, "-") != 0) if (!freopen(tracefile, "r", stdin)) err(1, "%s", tracefile); caph_cache_catpages(); caph_cache_tzdata(); #ifdef WITH_CASPER if (resolv) { if (cappwdgrp_setup(&cappwd, &capgrp) < 0) { cappwd = NULL; capgrp = NULL; } } if (!resolv || (cappwd != NULL && capgrp != NULL)) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #else if (!resolv) { if (caph_enter() < 0) err(1, "unable to enter capability mode"); } #endif if (caph_limit_stdio() == -1) err(1, "unable to limit stdio"); TAILQ_INIT(&trace_procs); drop_logged = 0; while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) { if (ktr_header.ktr_type & KTR_DROP) { ktr_header.ktr_type &= ~KTR_DROP; if (!drop_logged && threads) { printf( "%6jd %6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, ktr_header.ktr_tid > 0 ? (intmax_t)ktr_header.ktr_tid : 0, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } else if (!drop_logged) { printf("%6jd %-8.*s Events dropped.\n", (intmax_t)ktr_header.ktr_pid, MAXCOMLEN, ktr_header.ktr_comm); drop_logged = 1; } } if ((ktrlen = ktr_header.ktr_len) < 0) errx(1, "bogus length 0x%x", ktrlen); if (ktrlen > size) { m = realloc(m, ktrlen+1); if (m == NULL) errx(1, "%s", strerror(ENOMEM)); size = ktrlen; } if (ktrlen && fread_tail(m, ktrlen, 1) == 0) errx(1, "data too short"); if (fetchprocinfo(&ktr_header, (u_int *)m) != 0) continue; if (pid && ktr_header.ktr_pid != pid && ktr_header.ktr_tid != pid) continue; if ((trpoints & (1<ktr_type) { case KTR_PROCCTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); break; } } pi = malloc(sizeof(struct proc_info)); if (pi == NULL) errx(1, "%s", strerror(ENOMEM)); pi->sv_flags = *flags; pi->pid = kth->ktr_pid; TAILQ_INSERT_TAIL(&trace_procs, pi, info); return (1); case KTR_PROCDTOR: TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { TAILQ_REMOVE(&trace_procs, pi, info); free(pi); break; } } return (1); } return (0); } u_int findabi(struct ktr_header *kth) { struct proc_info *pi; TAILQ_FOREACH(pi, &trace_procs, info) { if (pi->pid == kth->ktr_pid) { return (pi->sv_flags); } } return (0); } void dumpheader(struct ktr_header *kth, u_int sv_flags) { static char unknown[64]; static struct timeval prevtime, prevtime_e; struct timeval temp; const char *abi; const char *arch; const char *type; const char *sign; switch (kth->ktr_type) { case KTR_SYSCALL: type = "CALL"; break; case KTR_SYSRET: type = "RET "; break; case KTR_NAMEI: type = "NAMI"; break; case KTR_GENIO: type = "GIO "; break; case KTR_PSIG: type = "PSIG"; break; case KTR_CSW: type = "CSW "; break; case KTR_USER: type = "USER"; break; case KTR_STRUCT: case KTR_STRUCT_ARRAY: type = "STRU"; break; case KTR_SYSCTL: type = "SCTL"; break; case KTR_CAPFAIL: type = "CAP "; break; case KTR_FAULT: type = "PFLT"; break; case KTR_FAULTEND: type = "PRET"; break; default: sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type); type = unknown; } /* * The ktr_tid field was previously the ktr_buffer field, which held * the kernel pointer value for the buffer associated with data * following the record header. It now holds a threadid, but only * for trace files after the change. Older trace files still contain * kernel pointers. Detect this and suppress the results by printing * negative tid's as 0. */ if (threads) printf("%6jd %6jd %-8.*s ", (intmax_t)kth->ktr_pid, kth->ktr_tid > 0 ? (intmax_t)kth->ktr_tid : 0, MAXCOMLEN, kth->ktr_comm); else printf("%6jd %-8.*s ", (intmax_t)kth->ktr_pid, MAXCOMLEN, kth->ktr_comm); if (timestamp) { if (timestamp & TIMESTAMP_ABSOLUTE) { printf("%jd.%06ld ", (intmax_t)kth->ktr_time.tv_sec, kth->ktr_time.tv_usec); } if (timestamp & TIMESTAMP_ELAPSED) { if (prevtime_e.tv_sec == 0) prevtime_e = kth->ktr_time; timersub(&kth->ktr_time, &prevtime_e, &temp); printf("%jd.%06ld ", (intmax_t)temp.tv_sec, temp.tv_usec); } if (timestamp & TIMESTAMP_RELATIVE) { if (prevtime.tv_sec == 0) prevtime = kth->ktr_time; if (timercmp(&kth->ktr_time, &prevtime, <)) { timersub(&prevtime, &kth->ktr_time, &temp); sign = "-"; } else { timersub(&kth->ktr_time, &prevtime, &temp); sign = ""; } prevtime = kth->ktr_time; printf("%s%jd.%06ld ", sign, (intmax_t)temp.tv_sec, temp.tv_usec); } } printf("%s ", type); if (abiflag != 0) { switch (sv_flags & SV_ABI_MASK) { case SV_ABI_LINUX: abi = "L"; break; case SV_ABI_FREEBSD: abi = "F"; break; default: abi = "U"; break; } if ((sv_flags & SV_LP64) != 0) arch = "64"; else if ((sv_flags & SV_ILP32) != 0) arch = "32"; else arch = "00"; printf("%s%s ", abi, arch); } } #include static void ioctlname(unsigned long val) { const char *str; str = sysdecode_ioctlname(val); if (str != NULL) printf("%s", str); else if (decimal) printf("%lu", val); else printf("%#lx", val); } static enum sysdecode_abi syscallabi(u_int sv_flags) { if (sv_flags == 0) return (SYSDECODE_ABI_FREEBSD); switch (sv_flags & SV_ABI_MASK) { case SV_ABI_FREEBSD: return (SYSDECODE_ABI_FREEBSD); case SV_ABI_LINUX: #ifdef __LP64__ if (sv_flags & SV_ILP32) return (SYSDECODE_ABI_LINUX32); #endif return (SYSDECODE_ABI_LINUX); default: return (SYSDECODE_ABI_UNKNOWN); } } static void syscallname(u_int code, u_int sv_flags) { const char *name; name = sysdecode_syscallname(syscallabi(sv_flags), code); if (name == NULL) printf("[%d]", code); else { printf("%s", name); if (syscallno) printf("[%d]", code); } } static void print_signal(int signo) { const char *signame; signame = sysdecode_signal(signo); if (signame != NULL) printf("%s", signame); else printf("SIG %d", signo); } void ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags) { int narg = ktr->ktr_narg; register_t *ip, *first; intmax_t arg; int quad_align, quad_slots; syscallname(ktr->ktr_code, sv_flags); ip = first = &ktr->ktr_args[0]; if (narg) { char c = '('; if (fancy && (sv_flags == 0 || (sv_flags & SV_ABI_MASK) == SV_ABI_FREEBSD)) { quad_align = 0; if (sv_flags & SV_ILP32) { #ifdef __powerpc__ quad_align = 1; #endif quad_slots = 2; } else quad_slots = 1; switch (ktr->ktr_code) { case SYS_bindat: case SYS_chflagsat: case SYS_connectat: case SYS_faccessat: case SYS_fchmodat: case SYS_fchownat: case SYS_fstatat: case SYS_futimesat: case SYS_linkat: case SYS_mkdirat: case SYS_mkfifoat: case SYS_mknodat: case SYS_openat: case SYS_readlinkat: case SYS_renameat: case SYS_unlinkat: case SYS_utimensat: putchar('('); print_integer_arg_valid(sysdecode_atfd, *ip); c = ','; ip++; narg--; break; } switch (ktr->ktr_code) { case SYS_ioctl: { print_number(ip, narg, c); putchar(c); ioctlname(*ip); c = ','; ip++; narg--; break; } case SYS_ptrace: putchar('('); print_integer_arg(sysdecode_ptrace_request, *ip); c = ','; ip++; narg--; break; case SYS_access: case SYS_eaccess: case SYS_faccessat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_access_mode, *ip); ip++; narg--; break; + case SYS_close_range: + print_number(ip, narg, c); + print_number(ip, narg, c); + putchar(','); + print_mask_arg(sysdecode_close_range_flags, *ip); + ip += 3; + narg -= 3; + break; case SYS_open: case SYS_openat: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); if ((ip[0] & O_CREAT) == O_CREAT) { putchar(','); decode_filemode(ip[1]); } ip += 2; narg -= 2; break; case SYS_wait4: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_wait4_options, *ip); ip++; narg--; break; case SYS_wait6: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_wait6_options, *ip); ip++; narg--; break; case SYS_chmod: case SYS_fchmod: case SYS_lchmod: case SYS_fchmodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_mknodat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_getfsstat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_getfsstat_mode, *ip); ip++; narg--; break; case SYS_mount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_unmount: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_recvmsg: case SYS_sendmsg: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_recvfrom: case SYS_sendto: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg0(sysdecode_msg_flags, *ip); ip++; narg--; break; case SYS_chflags: case SYS_chflagsat: case SYS_fchflags: case SYS_lchflags: print_number(ip, narg, c); putchar(','); decode_fileflags(*ip); ip++; narg--; break; case SYS_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_reboot: putchar('('); print_mask_arg(sysdecode_reboot_howto, *ip); ip++; narg--; break; case SYS_umask: putchar('('); decode_filemode(*ip); ip++; narg--; break; case SYS_msync: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_msync_flags, *ip); ip++; narg--; break; #ifdef SYS_freebsd6_mmap case SYS_freebsd6_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; #endif case SYS_mmap: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); putchar(','); ip++; narg--; print_mask_arg(sysdecode_mmap_flags, *ip); ip++; narg--; break; case SYS_mprotect: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mmap_prot, *ip); ip++; narg--; break; case SYS_madvise: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_madvice, *ip); ip++; narg--; break; case SYS_pathconf: case SYS_lpathconf: case SYS_fpathconf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_pathconf_name, *ip); ip++; narg--; break; case SYS_getpriority: case SYS_setpriority: putchar('('); print_integer_arg(sysdecode_prio_which, *ip); c = ','; ip++; narg--; break; case SYS_fcntl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_fcntl_cmd, ip[0]); if (sysdecode_fcntl_arg_p(ip[0])) { putchar(','); if (ip[0] == F_SETFL) print_mask_arg( sysdecode_fcntl_fileflags, ip[1]); else sysdecode_fcntl_arg(stdout, ip[0], ip[1], decimal ? 10 : 16); } ip += 2; narg -= 2; break; case SYS_socket: { int sockdomain; putchar('('); sockdomain = *ip; print_integer_arg(sysdecode_socketdomain, sockdomain); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; if (sockdomain == PF_INET || sockdomain == PF_INET6) { putchar(','); print_integer_arg(sysdecode_ipproto, *ip); ip++; narg--; } c = ','; break; } case SYS_setsockopt: case SYS_getsockopt: { const char *str; print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_sockopt_level, *ip); str = sysdecode_sockopt_name(ip[0], ip[1]); if (str != NULL) { printf(",%s", str); ip++; narg--; } ip++; narg--; break; } #ifdef SYS_freebsd6_lseek case SYS_freebsd6_lseek: print_number(ip, narg, c); /* Hidden 'pad' argument, not in lseek(2) */ print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; #endif case SYS_lseek: print_number(ip, narg, c); print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_whence, *ip); ip++; narg--; break; case SYS_flock: print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_flock_operation, *ip); ip++; narg--; break; case SYS_mkfifo: case SYS_mkfifoat: case SYS_mkdir: case SYS_mkdirat: print_number(ip, narg, c); putchar(','); decode_filemode(*ip); ip++; narg--; break; case SYS_shutdown: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shutdown_how, *ip); ip++; narg--; break; case SYS_socketpair: putchar('('); print_integer_arg(sysdecode_socketdomain, *ip); ip++; narg--; putchar(','); print_mask_arg(sysdecode_socket_type, *ip); ip++; narg--; c = ','; break; case SYS_getrlimit: case SYS_setrlimit: putchar('('); print_integer_arg(sysdecode_rlimit, *ip); ip++; narg--; c = ','; break; case SYS_getrusage: putchar('('); print_integer_arg(sysdecode_getrusage_who, *ip); ip++; narg--; c = ','; break; case SYS_quotactl: print_number(ip, narg, c); putchar(','); if (!sysdecode_quotactl_cmd(stdout, *ip)) { if (decimal) printf("", (int)*ip); else printf("", (int)*ip); } ip++; narg--; c = ','; break; case SYS_nfssvc: putchar('('); print_integer_arg(sysdecode_nfssvc_flags, *ip); ip++; narg--; c = ','; break; case SYS_rtprio: case SYS_rtprio_thread: putchar('('); print_integer_arg(sysdecode_rtprio_function, *ip); ip++; narg--; c = ','; break; case SYS___semctl: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_semctl_cmd, *ip); ip++; narg--; break; case SYS_semget: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_semget_flags, *ip); ip++; narg--; break; case SYS_msgctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_msgctl_cmd, *ip); ip++; narg--; break; case SYS_shmat: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_shmat_flags, *ip); ip++; narg--; break; case SYS_shmctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_shmctl_cmd, *ip); ip++; narg--; break; #ifdef SYS_freebsd12_shm_open case SYS_freebsd12_shm_open: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); ip += 2; narg -= 2; break; #endif case SYS_shm_open2: if (ip[0] == (uintptr_t)SHM_ANON) { printf("(SHM_ANON"); ip++; } else { print_number(ip, narg, c); } putchar(','); print_mask_arg(sysdecode_open_flags, ip[0]); putchar(','); decode_filemode(ip[1]); putchar(','); print_mask_arg(sysdecode_shmflags, ip[2]); ip += 3; narg -= 3; break; case SYS_minherit: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_minherit_inherit, *ip); ip++; narg--; break; case SYS_rfork: putchar('('); print_mask_arg(sysdecode_rfork_flags, *ip); ip++; narg--; c = ','; break; case SYS_lio_listio: putchar('('); print_integer_arg(sysdecode_lio_listio_mode, *ip); ip++; narg--; c = ','; break; case SYS_mlockall: putchar('('); print_mask_arg(sysdecode_mlockall_flags, *ip); ip++; narg--; break; case SYS_sched_setscheduler: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sched_get_priority_max: case SYS_sched_get_priority_min: putchar('('); print_integer_arg(sysdecode_scheduler_policy, *ip); ip++; narg--; break; case SYS_sendfile: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_sendfile_flags, *ip); ip++; narg--; break; case SYS_kldsym: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldsym_cmd, *ip); ip++; narg--; break; case SYS_sigprocmask: putchar('('); print_integer_arg(sysdecode_sigprocmask_how, *ip); ip++; narg--; c = ','; break; case SYS___acl_get_file: case SYS___acl_set_file: case SYS___acl_get_fd: case SYS___acl_set_fd: case SYS___acl_delete_file: case SYS___acl_delete_fd: case SYS___acl_aclcheck_file: case SYS___acl_aclcheck_fd: case SYS___acl_get_link: case SYS___acl_set_link: case SYS___acl_delete_link: case SYS___acl_aclcheck_link: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_acltype, *ip); ip++; narg--; break; case SYS_sigaction: putchar('('); print_signal(*ip); ip++; narg--; c = ','; break; case SYS_extattrctl: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_extattrnamespace, *ip); ip++; narg--; break; case SYS_nmount: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_mount_flags, *ip); ip++; narg--; break; case SYS_thr_create: print_number(ip, narg, c); print_number(ip, narg, c); putchar(','); print_mask_arg(sysdecode_thr_create_flags, *ip); ip++; narg--; break; case SYS_thr_kill: print_number(ip, narg, c); putchar(','); print_signal(*ip); ip++; narg--; break; case SYS_kldunloadf: print_number(ip, narg, c); putchar(','); print_integer_arg(sysdecode_kldunload_flags, *ip); ip++; narg--; break; case SYS_linkat: case SYS_renameat: case SYS_symlinkat: print_number(ip, narg, c); putchar(','); print_integer_arg_valid(sysdecode_atfd, *ip); ip++; narg--; print_number(ip, narg, c); break; case SYS_cap_fcntls_limit: print_number(ip, narg, c); putchar(','); arg = *ip; ip++; narg--; print_mask_arg32(sysdecode_cap_fcntlrights, arg); break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); (void)putchar(','); print_integer_arg(sysdecode_fadvice, *ip); ip++; narg--; break; case SYS_procctl: putchar('('); print_integer_arg(sysdecode_idtype, *ip); c = ','; ip++; narg--; print_number64(first, ip, narg, c); putchar(','); print_integer_arg(sysdecode_procctl_cmd, *ip); ip++; narg--; break; case SYS__umtx_op: { int op; print_number(ip, narg, c); putchar(','); if (print_mask_arg_part(sysdecode_umtx_op_flags, *ip, &op)) putchar('|'); print_integer_arg(sysdecode_umtx_op, op); putchar('>'); switch (*ip) { case UMTX_OP_CV_WAIT: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_cvwait_flags, *ip); break; case UMTX_OP_RW_RDLOCK: ip++; narg--; putchar(','); print_mask_argul( sysdecode_umtx_rwlock_flags, *ip); break; } ip++; narg--; break; } case SYS_ftruncate: case SYS_truncate: print_number(ip, narg, c); print_number64(first, ip, narg, c); break; case SYS_fchownat: print_number(ip, narg, c); print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_fstatat: case SYS_utimensat: print_number(ip, narg, c); print_number(ip, narg, c); break; case SYS_unlinkat: print_number(ip, narg, c); break; case SYS_sysarch: putchar('('); print_integer_arg(sysdecode_sysarch_number, *ip); ip++; narg--; c = ','; break; } switch (ktr->ktr_code) { case SYS_chflagsat: case SYS_fchownat: case SYS_faccessat: case SYS_fchmodat: case SYS_fstatat: case SYS_linkat: case SYS_unlinkat: case SYS_utimensat: putchar(','); print_mask_arg0(sysdecode_atflags, *ip); ip++; narg--; break; } } while (narg > 0) { print_number(ip, narg, c); } putchar(')'); } putchar('\n'); } void ktrsysret(struct ktr_sysret *ktr, u_int sv_flags) { register_t ret = ktr->ktr_retval; int error = ktr->ktr_error; syscallname(ktr->ktr_code, sv_flags); printf(" "); if (error == 0) { if (fancy) { printf("%ld", (long)ret); if (ret < 0 || ret > 9) printf("/%#lx", (unsigned long)ret); } else { if (decimal) printf("%ld", (long)ret); else printf("%#lx", (unsigned long)ret); } } else if (error == ERESTART) printf("RESTART"); else if (error == EJUSTRETURN) printf("JUSTRETURN"); else { printf("-1 errno %d", sysdecode_freebsd_to_abi_errno( syscallabi(sv_flags), error)); if (fancy) printf(" %s", strerror(ktr->ktr_error)); } putchar('\n'); } void ktrnamei(char *cp, int len) { printf("\"%.*s\"\n", len, cp); } void hexdump(char *p, int len, int screenwidth) { int n, i; int width; width = 0; do { width += 2; i = 13; /* base offset */ i += (width / 2) + 1; /* spaces every second byte */ i += (width * 2); /* width of bytes */ i += 3; /* " |" */ i += width; /* each byte */ i += 1; /* "|" */ } while (i < screenwidth); width -= 2; for (n = 0; n < len; n += width) { for (i = n; i < n + width; i++) { if ((i % width) == 0) { /* beginning of line */ printf(" 0x%04x", i); } if ((i % 2) == 0) { printf(" "); } if (i < len) printf("%02x", p[i] & 0xff); else printf(" "); } printf(" |"); for (i = n; i < n + width; i++) { if (i >= len) break; if (p[i] >= ' ' && p[i] <= '~') printf("%c", p[i]); else printf("."); } printf("|\n"); } if ((i % width) != 0) printf("\n"); } void visdump(char *dp, int datalen, int screenwidth) { int col = 0; char *cp; int width; char visbuf[5]; printf(" \""); col = 8; for (;datalen > 0; datalen--, dp++) { vis(visbuf, *dp, VIS_CSTYLE | VIS_NOLOCALE, *(dp+1)); cp = visbuf; /* * Keep track of printables and * space chars (like fold(1)). */ if (col == 0) { putchar('\t'); col = 8; } switch(*cp) { case '\n': col = 0; putchar('\n'); continue; case '\t': width = 8 - (col&07); break; default: width = strlen(cp); } if (col + width > (screenwidth-2)) { printf("\\\n\t"); col = 8; } col += width; do { putchar(*cp++); } while (*cp); } if (col == 0) printf(" "); printf("\"\n"); } void ktrgenio(struct ktr_genio *ktr, int len) { int datalen = len - sizeof (struct ktr_genio); char *dp = (char *)ktr + sizeof (struct ktr_genio); static int screenwidth = 0; int i, binary; printf("fd %d %s %d byte%s\n", ktr->ktr_fd, ktr->ktr_rw == UIO_READ ? "read" : "wrote", datalen, datalen == 1 ? "" : "s"); if (suppressdata) return; if (screenwidth == 0) { struct winsize ws; if (fancy && ioctl(fileno(stderr), TIOCGWINSZ, &ws) != -1 && ws.ws_col > 8) screenwidth = ws.ws_col; else screenwidth = 80; } if (maxdata && datalen > maxdata) datalen = maxdata; for (i = 0, binary = 0; i < datalen && binary == 0; i++) { if (dp[i] >= 32 && dp[i] < 127) continue; if (dp[i] == 10 || dp[i] == 13 || dp[i] == 0 || dp[i] == 9) continue; binary = 1; } if (binary) hexdump(dp, datalen, screenwidth); else visdump(dp, datalen, screenwidth); } void ktrpsig(struct ktr_psig *psig) { const char *str; print_signal(psig->signo); if (psig->action == SIG_DFL) { printf(" SIG_DFL"); } else { printf(" caught handler=0x%lx mask=0x%x", (u_long)psig->action, psig->mask.__bits[0]); } printf(" code="); str = sysdecode_sigcode(psig->signo, psig->code); if (str != NULL) printf("%s", str); else printf("", psig->code); putchar('\n'); } void ktrcsw_old(struct ktr_csw_old *cs) { printf("%s %s\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel"); } void ktrcsw(struct ktr_csw *cs) { printf("%s %s \"%s\"\n", cs->out ? "stop" : "resume", cs->user ? "user" : "kernel", cs->wmesg); } void ktruser(int len, void *p) { unsigned char *cp; if (sysdecode_utrace(stdout, p, len)) { printf("\n"); return; } printf("%d ", len); cp = p; while (len--) if (decimal) printf(" %d", *cp++); else printf(" %02x", *cp++); printf("\n"); } void ktrcaprights(cap_rights_t *rightsp) { printf("cap_rights_t "); sysdecode_cap_rights(stdout, rightsp); printf("\n"); } static void ktrtimeval(struct timeval *tv) { printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); } void ktritimerval(struct itimerval *it) { printf("itimerval { .interval = "); ktrtimeval(&it->it_interval); printf(", .value = "); ktrtimeval(&it->it_value); printf(" }\n"); } void ktrsockaddr(struct sockaddr *sa) { /* TODO: Support additional address families #include struct sockaddr_nb *nb; */ const char *str; char addr[64]; /* * note: ktrstruct() has already verified that sa points to a * buffer at least sizeof(struct sockaddr) bytes long and exactly * sa->sa_len bytes long. */ printf("struct sockaddr { "); str = sysdecode_sockaddr_family(sa->sa_family); if (str != NULL) printf("%s", str); else printf("", sa->sa_family); printf(", "); #define check_sockaddr_len(n) \ if (sa_##n.s##n##_len < sizeof(struct sockaddr_##n)) { \ printf("invalid"); \ break; \ } switch(sa->sa_family) { case AF_INET: { struct sockaddr_in sa_in; memset(&sa_in, 0, sizeof(sa_in)); memcpy(&sa_in, sa, sa->sa_len); check_sockaddr_len(in); inet_ntop(AF_INET, &sa_in.sin_addr, addr, sizeof addr); printf("%s:%u", addr, ntohs(sa_in.sin_port)); break; } case AF_INET6: { struct sockaddr_in6 sa_in6; memset(&sa_in6, 0, sizeof(sa_in6)); memcpy(&sa_in6, sa, sa->sa_len); check_sockaddr_len(in6); getnameinfo((struct sockaddr *)&sa_in6, sizeof(sa_in6), addr, sizeof(addr), NULL, 0, NI_NUMERICHOST); printf("[%s]:%u", addr, htons(sa_in6.sin6_port)); break; } case AF_UNIX: { struct sockaddr_un sa_un; memset(&sa_un, 0, sizeof(sa_un)); memcpy(&sa_un, sa, sa->sa_len); printf("%.*s", (int)sizeof(sa_un.sun_path), sa_un.sun_path); break; } default: printf("unknown address family"); } printf(" }\n"); } void ktrstat(struct stat *statp) { char mode[12], timestr[PATH_MAX + 4]; struct passwd *pwd; struct group *grp; struct tm *tm; /* * note: ktrstruct() has already verified that statp points to a * buffer exactly sizeof(struct stat) bytes long. */ printf("struct stat {"); printf("dev=%ju, ino=%ju, ", (uintmax_t)statp->st_dev, (uintmax_t)statp->st_ino); if (!resolv) printf("mode=0%jo, ", (uintmax_t)statp->st_mode); else { strmode(statp->st_mode, mode); printf("mode=%s, ", mode); } printf("nlink=%ju, ", (uintmax_t)statp->st_nlink); if (!resolv) { pwd = NULL; } else { #ifdef WITH_CASPER if (cappwd != NULL) pwd = cap_getpwuid(cappwd, statp->st_uid); else #endif pwd = getpwuid(statp->st_uid); } if (pwd == NULL) printf("uid=%ju, ", (uintmax_t)statp->st_uid); else printf("uid=\"%s\", ", pwd->pw_name); if (!resolv) { grp = NULL; } else { #ifdef WITH_CASPER if (capgrp != NULL) grp = cap_getgrgid(capgrp, statp->st_gid); else #endif grp = getgrgid(statp->st_gid); } if (grp == NULL) printf("gid=%ju, ", (uintmax_t)statp->st_gid); else printf("gid=\"%s\", ", grp->gr_name); printf("rdev=%ju, ", (uintmax_t)statp->st_rdev); printf("atime="); if (!resolv) printf("%jd", (intmax_t)statp->st_atim.tv_sec); else { tm = localtime(&statp->st_atim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_atim.tv_nsec != 0) printf(".%09ld, ", statp->st_atim.tv_nsec); else printf(", "); printf("mtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_mtim.tv_sec); else { tm = localtime(&statp->st_mtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_mtim.tv_nsec != 0) printf(".%09ld, ", statp->st_mtim.tv_nsec); else printf(", "); printf("ctime="); if (!resolv) printf("%jd", (intmax_t)statp->st_ctim.tv_sec); else { tm = localtime(&statp->st_ctim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_ctim.tv_nsec != 0) printf(".%09ld, ", statp->st_ctim.tv_nsec); else printf(", "); printf("birthtime="); if (!resolv) printf("%jd", (intmax_t)statp->st_birthtim.tv_sec); else { tm = localtime(&statp->st_birthtim.tv_sec); strftime(timestr, sizeof(timestr), TIME_FORMAT, tm); printf("\"%s\"", timestr); } if (statp->st_birthtim.tv_nsec != 0) printf(".%09ld, ", statp->st_birthtim.tv_nsec); else printf(", "); printf("size=%jd, blksize=%ju, blocks=%jd, flags=0x%x", (uintmax_t)statp->st_size, (uintmax_t)statp->st_blksize, (intmax_t)statp->st_blocks, statp->st_flags); printf(" }\n"); } void ktrstruct(char *buf, size_t buflen) { char *name, *data; size_t namelen, datalen; int i; cap_rights_t rights; struct itimerval it; struct stat sb; struct sockaddr_storage ss; for (name = buf, namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; data = buf + namelen + 1; datalen = buflen - namelen - 1; if (datalen == 0) goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalpha(name[i])) goto invalid; if (strcmp(name, "caprights") == 0) { if (datalen != sizeof(cap_rights_t)) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; memcpy(&it, data, datalen); ktritimerval(&it); } else if (strcmp(name, "stat") == 0) { if (datalen != sizeof(struct stat)) goto invalid; memcpy(&sb, data, datalen); ktrstat(&sb); } else if (strcmp(name, "sockaddr") == 0) { if (datalen > sizeof(ss)) goto invalid; memcpy(&ss, data, datalen); if (datalen != ss.ss_len) goto invalid; ktrsockaddr((struct sockaddr *)&ss); } else { printf("unknown structure\n"); } return; invalid: printf("invalid record\n"); } void ktrcapfail(struct ktr_cap_fail *ktr) { switch (ktr->cap_type) { case CAPFAIL_NOTCAPABLE: /* operation on fd with insufficient capabilities */ printf("operation requires "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(", descriptor holds "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; case CAPFAIL_INCREASE: /* requested more capabilities than fd already has */ printf("attempt to increase capabilities from "); sysdecode_cap_rights(stdout, &ktr->cap_held); printf(" to "); sysdecode_cap_rights(stdout, &ktr->cap_needed); break; case CAPFAIL_SYSCALL: /* called restricted syscall */ printf("disallowed system call"); break; case CAPFAIL_LOOKUP: /* absolute or AT_FDCWD path, ".." path, etc. */ printf("restricted VFS lookup"); break; default: printf("unknown capability failure: "); sysdecode_cap_rights(stdout, &ktr->cap_needed); printf(" "); sysdecode_cap_rights(stdout, &ktr->cap_held); break; } printf("\n"); } void ktrfault(struct ktr_fault *ktr) { printf("0x%jx ", (uintmax_t)ktr->vaddr); print_mask_arg(sysdecode_vmprot, ktr->type); printf("\n"); } void ktrfaultend(struct ktr_faultend *ktr) { const char *str; str = sysdecode_vmresult(ktr->result); if (str != NULL) printf("%s", str); else printf("", ktr->result); printf("\n"); } void ktrkevent(struct kevent *kev) { printf("{ ident="); switch (kev->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: case EVFILT_EMPTY: printf("%ju", (uintmax_t)kev->ident); break; case EVFILT_SIGNAL: print_signal(kev->ident); break; default: printf("%p", (void *)kev->ident); } printf(", filter="); print_integer_arg(sysdecode_kevent_filter, kev->filter); printf(", flags="); print_mask_arg0(sysdecode_kevent_flags, kev->flags); printf(", fflags="); sysdecode_kevent_fflags(stdout, kev->filter, kev->fflags, decimal ? 10 : 16); printf(", data=%#jx, udata=%p }", (uintmax_t)kev->data, kev->udata); } void ktrstructarray(struct ktr_struct_array *ksa, size_t buflen) { struct kevent kev; char *name, *data; size_t namelen, datalen; int i; bool first; buflen -= sizeof(*ksa); for (name = (char *)(ksa + 1), namelen = 0; namelen < buflen && name[namelen] != '\0'; ++namelen) /* nothing */; if (namelen == buflen) goto invalid; if (name[namelen] != '\0') goto invalid; /* sanity check */ for (i = 0; i < (int)namelen; ++i) if (!isalnum(name[i]) && name[i] != '_') goto invalid; data = name + namelen + 1; datalen = buflen - namelen - 1; printf("struct %s[] = { ", name); first = true; for (; datalen >= ksa->struct_size; data += ksa->struct_size, datalen -= ksa->struct_size) { if (!first) printf("\n "); else first = false; if (strcmp(name, "kevent") == 0) { if (ksa->struct_size != sizeof(kev)) goto bad_size; memcpy(&kev, data, sizeof(kev)); ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent") == 0) { struct freebsd11_kevent kev11; if (ksa->struct_size != sizeof(kev11)) goto bad_size; memcpy(&kev11, data, sizeof(kev11)); memset(&kev, 0, sizeof(kev)); kev.ident = kev11.ident; kev.filter = kev11.filter; kev.flags = kev11.flags; kev.fflags = kev11.fflags; kev.data = kev11.data; kev.udata = kev11.udata; ktrkevent(&kev); #ifdef _WANT_KEVENT32 } else if (strcmp(name, "kevent32") == 0) { struct kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; #if BYTE_ORDER == BIG_ENDIAN kev.data = kev32.data2 | ((int64_t)kev32.data1 << 32); #else kev.data = kev32.data1 | ((int64_t)kev32.data2 << 32); #endif kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); } else if (strcmp(name, "freebsd11_kevent32") == 0) { struct freebsd11_kevent32 kev32; if (ksa->struct_size != sizeof(kev32)) goto bad_size; memcpy(&kev32, data, sizeof(kev32)); memset(&kev, 0, sizeof(kev)); kev.ident = kev32.ident; kev.filter = kev32.filter; kev.flags = kev32.flags; kev.fflags = kev32.fflags; kev.data = kev32.data; kev.udata = (void *)(uintptr_t)kev32.udata; ktrkevent(&kev); #endif } else { printf(" }\n"); return; } } printf(" }\n"); return; invalid: printf("invalid record\n"); return; bad_size: printf(" }\n"); return; } void usage(void) { fprintf(stderr, "usage: kdump [-dEnlHRrSsTA] [-f trfile] " "[-m maxdata] [-p pid] [-t trstr]\n"); exit(1); } diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h index 7a403d5b680d..53a1fd6ee8d7 100644 --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -1,275 +1,276 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * System call arguments come in several flavours: * Hex -- values that should be printed in hex (addresses) * Octal -- Same as above, but octal * Int -- normal integer values (file descriptors, for example) * LongHex -- long value that should be printed in hex * Name -- pointer to a NULL-terminated string. * BinString -- pointer to an array of chars, printed via strvisx(). * Ptr -- pointer to some unspecified structure. Just print as hex for now. * Stat -- a pointer to a stat buffer. Prints a couple fields. * Stat11 -- a pointer to a freebsd 11 stat buffer. Prints a couple fields. * StatFs -- a pointer to a statfs buffer. Prints a few fields. * Ioctl -- an ioctl command. Woefully limited. * Quad -- a double-word value. e.g., lseek(int, offset_t, int) * Signal -- a signal number. Prints the signal name (SIGxxx) * Sockaddr -- a pointer to a struct sockaddr. Prints symbolic AF, and IP:Port * StringArray -- a pointer to an array of string pointers. * Timespec -- a pointer to a struct timespec. Prints both elements. * Timeval -- a pointer to a struct timeval. Prints both elements. * Timeval2 -- a pointer to two struct timevals. Prints both elements of both. * Itimerval -- a pointer to a struct itimerval. Prints all elements. * Pollfd -- a pointer to an array of struct pollfd. Prints .fd and .events. * Fd_set -- a pointer to an array of fd_set. Prints the fds that are set. * Sigaction -- a pointer to a struct sigaction. Prints all elements. * Sigset -- a pointer to a sigset_t. Prints the signals that are set. * Sigprocmask -- the first argument to sigprocmask(). Prints the name. * Kevent -- a pointer to an array of struct kevents. Prints all elements. * Pathconf -- the 2nd argument of pathconf(). * Utrace -- utrace(2) buffer. * CapRights -- a pointer to a cap_rights_t. Prints all set capabilities. * * In addition, the pointer types (String, Ptr) may have OUT masked in -- * this means that the data is set on *return* from the system call -- or * IN (meaning that the data is passed *into* the system call). */ enum Argtype { None = 1, /* Scalar integers. */ Socklent, Octal, Int, UInt, Hex, Long, LongHex, Sizet, Quad, QuadHex, /* Encoded scalar values. */ Accessmode, Acltype, AiofsyncOp, Atfd, Atflags, CapFcntlRights, + Closerangeflags, Extattrnamespace, Fadvice, Fcntl, Fcntlflag, FileFlags, Flockop, Getfsstatmode, Idtype, Ioctl, Kldsymcmd, Kldunloadflags, LioMode, Madvice, Minherit, Msgflags, Mlockall, Mmapflags, Mountflags, Mprot, Msync, Open, Pathconf, Pipe2, Procctl, Priowhich, Ptraceop, Sendfileflags, Sendfilehdtr, Quotactlcmd, Reboothowto, Resource, Rforkflags, Rtpriofunc, RusageWho, Schedpolicy, ShmFlags, Shutdown, Signal, Sigprocmask, Sockdomain, Sockoptlevel, Sockoptname, Sockprotocol, Socktype, Sysarch, Sysctl, Umtxop, Waitoptions, Whence, /* Pointers to non-structures. */ Ptr, AiocbArray, AiocbPointer, BinString, CapRights, ExecArgs, ExecEnv, ExitStatus, Fd_set, IntArray, Iovec, Name, PipeFds, PSig, PQuadHex, PUInt, Readlinkres, ShmName, StringArray, /* Pointers to structures. */ Aiocb, Itimerval, Kevent, Kevent11, LinuxSockArgs, Msghdr, Pollfd, Rlimit, Rusage, Schedparam, Sctpsndrcvinfo, Sigaction, Sigevent, Siginfo, Sigset, Sockaddr, Stat, Stat11, StatFs, Timespec, Timespec2, Timeval, Timeval2, Utrace, MAX_ARG_TYPE, }; #define ARG_MASK 0xff #define OUT 0x100 #define IN /*0x20*/0 _Static_assert(ARG_MASK > MAX_ARG_TYPE, "ARG_MASK overlaps with Argtype values"); struct syscall_arg { enum Argtype type; int offset; }; struct syscall_decode { const char *name; /* Name for calling convention lookup. */ /* * Syscall return type: * 0: no return value (e.g. exit) * 1: normal return value (a single int/long/pointer) * 2: off_t return value (two values for 32-bit ABIs) */ u_int ret_type; u_int nargs; /* number of meaningful arguments */ struct syscall_arg args[10]; /* Hopefully no syscalls with > 10 args */ }; struct syscall { STAILQ_ENTRY(syscall) entries; const char *name; /* Name to be displayed, might be malloc()'d */ struct syscall_decode decode; struct timespec time; /* Time spent for this call */ int ncalls; /* Number of calls */ int nerror; /* Number of calls that returned with error */ bool unknown; /* Unknown system call */ }; struct syscall *get_syscall(struct threadinfo *, u_int, u_int); char *print_arg(struct syscall_arg *, unsigned long *, register_t *, struct trussinfo *); /* * Linux Socket defines */ #define LINUX_SOCKET 1 #define LINUX_BIND 2 #define LINUX_CONNECT 3 #define LINUX_LISTEN 4 #define LINUX_ACCEPT 5 #define LINUX_GETSOCKNAME 6 #define LINUX_GETPEERNAME 7 #define LINUX_SOCKETPAIR 8 #define LINUX_SEND 9 #define LINUX_RECV 10 #define LINUX_SENDTO 11 #define LINUX_RECVFROM 12 #define LINUX_SHUTDOWN 13 #define LINUX_SETSOCKOPT 14 #define LINUX_GETSOCKOPT 15 #define LINUX_SENDMSG 16 #define LINUX_RECVMSG 17 #define PAD_(t) (sizeof(register_t) <= sizeof(t) ? \ 0 : sizeof(register_t) - sizeof(t)) #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 #define PADR_(t) PAD_(t) #else #define PADL_(t) PAD_(t) #define PADR_(t) 0 #endif typedef int l_int; typedef uint32_t l_ulong; struct linux_socketcall_args { char what_l_[PADL_(l_int)]; l_int what; char what_r_[PADR_(l_int)]; char args_l_[PADL_(l_ulong)]; l_ulong args; char args_r_[PADR_(l_ulong)]; }; void print_syscall(struct trussinfo *); void print_syscall_ret(struct trussinfo *, int, register_t *); void print_summary(struct trussinfo *trussinfo); diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 586b583c3209..d611f3f73471 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -1,2809 +1,2814 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This file has routines used to print out system calls and their * arguments. */ #include #include #include #define _WANT_FREEBSD11_KEVENT #include #include #include #include #include #include #include #include #include #include #define _WANT_FREEBSD11_STAT #include #include #include #include #include #include #include #include #include #include #include #define _WANT_KERNEL_ERRNO #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "extern.h" #include "syscall.h" /* * This should probably be in its own file, sorted alphabetically. * * Note: We only scan this table on the initial syscall number to calling * convention lookup, i.e. once each time a new syscall is encountered. This * is unlikely to be a performance issue, but if it is we could sort this array * and use a binary search instead. */ static const struct syscall_decode decoded_syscalls[] = { /* Native ABI */ { .name = "__acl_aclcheck_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_delete_fd", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_file", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_get_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__cap_rights_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } }, { .name = "__getcwd", .ret_type = 1, .nargs = 2, .args = { { Name | OUT, 0 }, { Int, 1 } } }, { .name = "__realpathat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Name | OUT, 2 }, { Sizet, 3 }, { Int, 4} } }, { .name = "_umtx_op", .ret_type = 1, .nargs = 5, .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 }, { Ptr, 4 } } }, { .name = "accept", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "access", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "aio_cancel", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Aiocb, 1 } } }, { .name = "aio_error", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_fsync", .ret_type = 1, .nargs = 2, .args = { { AiofsyncOp, 0 }, { Aiocb, 1 } } }, { .name = "aio_mlock", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_read", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_return", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "aio_suspend", .ret_type = 1, .nargs = 3, .args = { { AiocbArray, 0 }, { Int, 1 }, { Timespec, 2 } } }, { .name = "aio_waitcomplete", .ret_type = 1, .nargs = 2, .args = { { AiocbPointer | OUT, 0 }, { Timespec, 1 } } }, { .name = "aio_write", .ret_type = 1, .nargs = 1, .args = { { Aiocb, 0 } } }, { .name = "bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "bindat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "break", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "cap_fcntls_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights | OUT, 1 } } }, { .name = "cap_fcntls_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights, 1 } } }, { .name = "cap_getmode", .ret_type = 1, .nargs = 1, .args = { { PUInt | OUT, 0 } } }, { .name = "cap_rights_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapRights, 1 } } }, { .name = "chdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "chflagsat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { FileFlags, 2 }, { Atflags, 3 } } }, { .name = "chmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "chown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "chroot", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "clock_gettime", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "closefrom", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, + { .name = "close_range", .ret_type = 1, .nargs = 3, + .args = { { Int, 0 }, { Int, 1 }, { Closerangeflags, 2 } } }, { .name = "compat11.fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat11 | OUT, 1 } } }, { .name = "compat11.fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat11 | OUT, 2 }, { Atflags, 3 } } }, { .name = "compat11.kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent11, 1 }, { Int, 2 }, { Kevent11 | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "compat11.lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "compat11.mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Int, 2 } } }, { .name = "compat11.mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Int, 3 } } }, { .name = "compat11.stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat11 | OUT, 1 } } }, { .name = "connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "connectat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "dup2", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "eaccess", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "exit", .ret_type = 0, .nargs = 1, .args = { { Hex, 0 } } }, { .name = "extattr_delete_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_get_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_list_fd", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_file", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_link", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_set_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattrctl", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Hex, 1 }, { Name, 2 }, { Extattrnamespace, 3 }, { Name, 4 } } }, { .name = "faccessat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Accessmode, 2 }, { Atflags, 3 } } }, { .name = "fchflags", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { FileFlags, 1 } } }, { .name = "fchmod", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Octal, 1 } } }, { .name = "fchmodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Atflags, 3 } } }, { .name = "fchown", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "fchownat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Int, 2 }, { Int, 3 }, { Atflags, 4 } } }, { .name = "fcntl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Fcntl, 1 }, { Fcntlflag, 2 } } }, { .name = "fdatasync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "flock", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Flockop, 1 } } }, { .name = "fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat | OUT, 1 } } }, { .name = "fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat | OUT, 2 }, { Atflags, 3 } } }, { .name = "fstatfs", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { StatFs | OUT, 1 } } }, { .name = "fsync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "ftruncate", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { QuadHex | IN, 1 } } }, { .name = "futimens", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec2 | IN, 1 } } }, { .name = "futimes", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timeval2 | IN, 1 } } }, { .name = "futimesat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timeval2 | IN, 2 } } }, { .name = "getdirentries", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { PQuadHex | OUT, 3 } } }, { .name = "getfsstat", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Long, 1 }, { Getfsstatmode, 2 } } }, { .name = "getitimer", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Itimerval | OUT, 2 } } }, { .name = "getpeername", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getpgid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getpriority", .ret_type = 1, .nargs = 2, .args = { { Priowhich, 0 }, { Int, 1 } } }, { .name = "getrandom", .ret_type = 1, .nargs = 3, .args = { { BinString | OUT, 0 }, { Sizet, 1 }, { UInt, 2 } } }, { .name = "getrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } }, { .name = "getrusage", .ret_type = 1, .nargs = 2, .args = { { RusageWho, 0 }, { Rusage | OUT, 1 } } }, { .name = "getsid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getsockname", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "kill", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { Signal | IN, 1 } } }, { .name = "kldfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldfirstmod", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldload", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldnext", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr, 1 } } }, { .name = "kldsym", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Kldsymcmd, 1 }, { Ptr, 2 } } }, { .name = "kldunload", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldunloadf", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Kldunloadflags, 1 } } }, { .name = "kse_release", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "lchflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "lchmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "lchown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "linkat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 }, { Atflags, 4 } } }, { .name = "lio_listio", .ret_type = 1, .nargs = 4, .args = { { LioMode, 0 }, { AiocbArray, 1 }, { Int, 2 }, { Sigevent, 3 } } }, { .name = "listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { QuadHex, 1 }, { Whence, 2 } } }, { .name = "lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "lutimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "madvise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Madvice, 2 } } }, { .name = "minherit", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Minherit, 2 } } }, { .name = "mkdir", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkdirat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mkfifo", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkfifoat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Quad, 2 } } }, { .name = "mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Quad, 3 } } }, { .name = "mlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "mlockall", .ret_type = 1, .nargs = 1, .args = { { Mlockall, 0 } } }, { .name = "mmap", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 }, { Mmapflags, 3 }, { Int, 4 }, { QuadHex, 5 } } }, { .name = "modfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "mount", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Name, 1 }, { Mountflags, 2 }, { Ptr, 3 } } }, { .name = "mprotect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 } } }, { .name = "msync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Msync, 2 } } }, { .name = "munlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "munmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "nanosleep", .ret_type = 1, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "nmount", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { UInt, 1 }, { Mountflags, 2 } } }, { .name = "open", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "openat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Open, 2 }, { Octal, 3 } } }, { .name = "pathconf", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Pathconf, 1 } } }, { .name = "pipe", .ret_type = 1, .nargs = 1, .args = { { PipeFds | OUT, 0 } } }, { .name = "pipe2", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Pipe2, 1 } } }, { .name = "poll", .ret_type = 1, .nargs = 3, .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "posix_fadvise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { QuadHex, 1 }, { QuadHex, 2 }, { Fadvice, 3 } } }, { .name = "posix_openpt", .ret_type = 1, .nargs = 1, .args = { { Open, 0 } } }, { .name = "pread", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "procctl", .ret_type = 1, .nargs = 4, .args = { { Idtype, 0 }, { Quad, 1 }, { Procctl, 2 }, { Ptr, 3 } } }, { .name = "ptrace", .ret_type = 1, .nargs = 4, .args = { { Ptraceop, 0 }, { Int, 1 }, { Ptr, 2 }, { Int, 3 } } }, { .name = "pwrite", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "quotactl", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Quotactlcmd, 1 }, { Int, 2 }, { Ptr, 3 } } }, { .name = "read", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Readlinkres | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlinkat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Readlinkres | OUT, 2 }, { Sizet, 3 } } }, { .name = "readv", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 } } }, { .name = "reboot", .ret_type = 1, .nargs = 1, .args = { { Reboothowto, 0 } } }, { .name = "recvfrom", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | OUT, 4 }, { Ptr | OUT, 5 } } }, { .name = "recvmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Msghdr | OUT, 1 }, { Msgflags, 2 } } }, { .name = "rename", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "rmdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "rtprio", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "rtprio_thread", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "sched_get_priority_max", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_get_priority_min", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_getparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam | OUT, 1 } } }, { .name = "sched_getscheduler", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "sched_rr_get_interval", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "sched_setparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam, 1 } } }, { .name = "sched_setscheduler", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Schedpolicy, 1 }, { Schedparam, 2 } } }, { .name = "sctp_generic_recvmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Iovec | OUT, 1 }, { Int, 2 }, { Sockaddr | OUT, 3 }, { Ptr | OUT, 4 }, { Sctpsndrcvinfo | OUT, 5 }, { Ptr | OUT, 6 } } }, { .name = "sctp_generic_sendmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } }, { .name = "sctp_generic_sendmsg_iov", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Sctpsndrcvinfo | IN, 5 }, { Msgflags, 6 } } }, { .name = "sendfile", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Int, 1 }, { QuadHex, 2 }, { Sizet, 3 }, { Sendfilehdtr, 4 }, { QuadHex | OUT, 5 }, { Sendfileflags, 6 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, { .name = "sendmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Msghdr | IN, 1 }, { Msgflags, 2 } } }, { .name = "sendto", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | IN, 4 }, { Socklent | IN, 5 } } }, { .name = "setitimer", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Itimerval, 1 }, { Itimerval | OUT, 2 } } }, { .name = "setpriority", .ret_type = 1, .nargs = 3, .args = { { Priowhich, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "setrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | IN, 1 } } }, { .name = "setsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | IN, 3 }, { Socklent, 4 } } }, { .name = "shm_open", .ret_type = 1, .nargs = 3, .args = { { ShmName | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "shm_open2", .ret_type = 1, .nargs = 5, .args = { { ShmName | IN, 0 }, { Open, 1 }, { Octal, 2 }, { ShmFlags, 3 }, { Name | IN, 4 } } }, { .name = "shm_rename", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Name | IN, 1 }, { Hex, 2 } } }, { .name = "shm_unlink", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Shutdown, 1 } } }, { .name = "sigaction", .ret_type = 1, .nargs = 3, .args = { { Signal, 0 }, { Sigaction | IN, 1 }, { Sigaction | OUT, 2 } } }, { .name = "sigpending", .ret_type = 1, .nargs = 1, .args = { { Sigset | OUT, 0 } } }, { .name = "sigprocmask", .ret_type = 1, .nargs = 3, .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } }, { .name = "sigqueue", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Signal, 1 }, { LongHex, 2 } } }, { .name = "sigreturn", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "sigsuspend", .ret_type = 1, .nargs = 1, .args = { { Sigset | IN, 0 } } }, { .name = "sigtimedwait", .ret_type = 1, .nargs = 3, .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 }, { Timespec | IN, 2 } } }, { .name = "sigwait", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { PSig | OUT, 1 } } }, { .name = "sigwaitinfo", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Siginfo | OUT, 1 } } }, { .name = "socket", .ret_type = 1, .nargs = 3, .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Sockprotocol, 2 } } }, { .name = "stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "statfs", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { StatFs | OUT, 1 } } }, { .name = "symlink", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "symlinkat", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } }, { .name = "sysarch", .ret_type = 1, .nargs = 2, .args = { { Sysarch, 0 }, { Ptr, 1 } } }, { .name = "__sysctl", .ret_type = 1, .nargs = 6, .args = { { Sysctl, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, { Ptr, 4 }, { Sizet, 5 } } }, { .name = "__sysctlbyname", .ret_type = 1, .nargs = 6, .args = { { Name, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, { Ptr, 4}, { Sizet, 5 } } }, { .name = "thr_kill", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Signal, 1 } } }, { .name = "thr_self", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "thr_set_name", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Name, 1 } } }, { .name = "truncate", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { QuadHex | IN, 1 } } }, #if 0 /* Does not exist */ { .name = "umount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 2 } } }, #endif { .name = "unlink", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "unlinkat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Atflags, 2 } } }, { .name = "unmount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Mountflags, 1 } } }, { .name = "utimensat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timespec2 | IN, 2 }, { Atflags, 3 } } }, { .name = "utimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "utrace", .ret_type = 1, .nargs = 1, .args = { { Utrace, 0 } } }, { .name = "wait4", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 }, { Rusage | OUT, 3 } } }, { .name = "wait6", .ret_type = 1, .nargs = 6, .args = { { Idtype, 0 }, { Quad, 1 }, { ExitStatus | OUT, 2 }, { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Siginfo | OUT, 5 } } }, { .name = "write", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 } } }, { .name = "writev", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Iovec | IN, 1 }, { Int, 2 } } }, /* Linux ABI */ { .name = "linux_access", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Accessmode, 1 } } }, { .name = "linux_execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "linux_lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } }, { .name = "linux_mkdir", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Int, 1 } } }, { .name = "linux_newfstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_newstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_open", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } }, { .name = "linux_readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Name | OUT, 1 }, { Sizet, 2 } } }, { .name = "linux_socketcall", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { LinuxSockArgs, 1 } } }, { .name = "linux_stat64", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, }; static STAILQ_HEAD(, syscall) seen_syscalls; /* Xlat idea taken from strace */ struct xlat { int val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) X(POLLWRBAND) X(POLLINIGNEOF) X(POLLRDHUP) XEND }; static struct xlat sigaction_flags[] = { X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP) X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND }; static struct xlat linux_socketcall_ops[] = { X(LINUX_SOCKET) X(LINUX_BIND) X(LINUX_CONNECT) X(LINUX_LISTEN) X(LINUX_ACCEPT) X(LINUX_GETSOCKNAME) X(LINUX_GETPEERNAME) X(LINUX_SOCKETPAIR) X(LINUX_SEND) X(LINUX_RECV) X(LINUX_SENDTO) X(LINUX_RECVFROM) X(LINUX_SHUTDOWN) X(LINUX_SETSOCKOPT) X(LINUX_GETSOCKOPT) X(LINUX_SENDMSG) X(LINUX_RECVMSG) XEND }; static struct xlat lio_modes[] = { X(LIO_WAIT) X(LIO_NOWAIT) XEND }; static struct xlat lio_opcodes[] = { X(LIO_WRITE) X(LIO_READ) X(LIO_READV) X(LIO_WRITEV) X(LIO_NOP) XEND }; static struct xlat aio_fsync_ops[] = { X(O_SYNC) XEND }; #undef X #undef XEND /* * Searches an xlat array for a value, and returns it if found. Otherwise * return a string representation. */ static const char * lookup(struct xlat *xlat, int val, int base) { static char tmp[16]; for (; xlat->str != NULL; xlat++) if (xlat->val == val) return (xlat->str); switch (base) { case 8: sprintf(tmp, "0%o", val); break; case 16: sprintf(tmp, "0x%x", val); break; case 10: sprintf(tmp, "%u", val); break; default: errx(1, "Unknown lookup base"); } return (tmp); } static const char * xlookup(struct xlat *xlat, int val) { return (lookup(xlat, val, 16)); } /* * Searches an xlat array containing bitfield values. Remaining bits * set after removing the known ones are printed at the end: * IN|0x400. */ static char * xlookup_bits(struct xlat *xlat, int val) { int len, rem; static char str[512]; len = 0; rem = val; for (; xlat->str != NULL; xlat++) { if ((xlat->val & rem) == xlat->val) { /* * Don't print the "all-bits-zero" string unless all * bits are really zero. */ if (xlat->val == 0 && val != 0) continue; len += sprintf(str + len, "%s|", xlat->str); rem &= ~(xlat->val); } } /* * If we have leftover bits or didn't match anything, print * the remainder. */ if (rem || len == 0) len += sprintf(str + len, "0x%x", rem); if (len && str[len - 1] == '|') len--; str[len] = 0; return (str); } static void print_integer_arg(const char *(*decoder)(int), FILE *fp, int value) { const char *str; str = decoder(value); if (str != NULL) fputs(str, fp); else fprintf(fp, "%d", value); } static bool print_mask_arg_part(bool (*decoder)(FILE *, int, int *), FILE *fp, int value, int *rem) { return (decoder(fp, value, rem)); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), FILE *fp, int value) { int rem; if (!print_mask_arg_part(decoder, fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), FILE *fp, uint32_t value) { uint32_t rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } /* * Add argument padding to subsequent system calls after Quad * syscall arguments as needed. This used to be done by hand in the * decoded_syscalls table which was ugly and error prone. It is * simpler to do the fixup of offsets at initialization time than when * decoding arguments. */ static void quad_fixup(struct syscall_decode *sc) { int offset, prev; u_int i; offset = 0; prev = -1; for (i = 0; i < sc->nargs; i++) { /* This arg type is a dummy that doesn't use offset. */ if ((sc->args[i].type & ARG_MASK) == PipeFds) continue; assert(prev < sc->args[i].offset); prev = sc->args[i].offset; sc->args[i].offset += offset; switch (sc->args[i].type & ARG_MASK) { case Quad: case QuadHex: #if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) /* * 64-bit arguments on 32-bit powerpc and arm must be * 64-bit aligned. If the current offset is * not aligned, the calling convention inserts * a 32-bit pad argument that should be skipped. */ if (sc->args[i].offset % 2 == 1) { sc->args[i].offset++; offset++; } #endif offset++; default: break; } } } static struct syscall * find_syscall(struct procabi *abi, u_int number) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) return (abi->syscalls[number]); STAILQ_FOREACH(es, &abi->extra_syscalls, entries) { if (es->number == number) return (es->sc); } return (NULL); } static void add_syscall(struct procabi *abi, u_int number, struct syscall *sc) { struct extra_syscall *es; /* * quad_fixup() is currently needed for all 32-bit ABIs. * TODO: This should probably be a function pointer inside struct * procabi instead. */ if (abi->pointer_size == 4) quad_fixup(&sc->decode); if (number < nitems(abi->syscalls)) { assert(abi->syscalls[number] == NULL); abi->syscalls[number] = sc; } else { es = malloc(sizeof(*es)); es->sc = sc; es->number = number; STAILQ_INSERT_TAIL(&abi->extra_syscalls, es, entries); } STAILQ_INSERT_HEAD(&seen_syscalls, sc, entries); } /* * If/when the list gets big, it might be desirable to do it * as a hash table or binary search. */ struct syscall * get_syscall(struct threadinfo *t, u_int number, u_int nargs) { struct syscall *sc; struct procabi *procabi; const char *sysdecode_name; const char *lookup_name; const char *name; u_int i; procabi = t->proc->abi; sc = find_syscall(procabi, number); if (sc != NULL) return (sc); /* Memory is not explicitly deallocated, it's released on exit(). */ sysdecode_name = sysdecode_syscallname(procabi->abi, number); if (sysdecode_name == NULL) asprintf(__DECONST(char **, &name), "#%d", number); else name = sysdecode_name; sc = calloc(1, sizeof(*sc)); sc->name = name; /* Also decode compat syscalls arguments by stripping the prefix. */ lookup_name = name; if (procabi->compat_prefix != NULL && strncmp(procabi->compat_prefix, name, strlen(procabi->compat_prefix)) == 0) lookup_name += strlen(procabi->compat_prefix); for (i = 0; i < nitems(decoded_syscalls); i++) { if (strcmp(lookup_name, decoded_syscalls[i].name) == 0) { sc->decode = decoded_syscalls[i]; add_syscall(t->proc->abi, number, sc); return (sc); } } /* It is unknown. Add it into the list. */ #if DEBUG fprintf(stderr, "unknown syscall %s -- setting args to %d\n", name, nargs); #endif sc->unknown = sysdecode_name == NULL; sc->decode.ret_type = 1; /* Assume 1 return value. */ sc->decode.nargs = nargs; for (i = 0; i < nargs; i++) { sc->decode.args[i].offset = i; /* Treat all unknown arguments as LongHex. */ sc->decode.args[i].type = LongHex; } add_syscall(t->proc->abi, number, sc); return (sc); } /* * Copy a fixed amount of bytes from the process. */ static int get_struct(pid_t pid, psaddr_t offset, void *buf, size_t len) { struct ptrace_io_desc iorequest; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (void *)(uintptr_t)offset; iorequest.piod_addr = buf; iorequest.piod_len = len; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) return (-1); return (0); } #define MAXSIZE 4096 /* * Copy a string from the process. Note that it is * expected to be a C string, but if max is set, it will * only get that much. */ static char * get_string(pid_t pid, psaddr_t addr, int max) { struct ptrace_io_desc iorequest; char *buf, *nbuf; size_t offset, size, totalsize; offset = 0; if (max) size = max + 1; else { /* Read up to the end of the current page. */ size = PAGE_SIZE - (addr % PAGE_SIZE); if (size > MAXSIZE) size = MAXSIZE; } totalsize = size; buf = malloc(totalsize); if (buf == NULL) return (NULL); for (;;) { iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (void *)((uintptr_t)addr + offset); iorequest.piod_addr = buf + offset; iorequest.piod_len = size; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) { free(buf); return (NULL); } if (memchr(buf + offset, '\0', size) != NULL) return (buf); offset += size; if (totalsize < MAXSIZE && max == 0) { size = MAXSIZE - totalsize; if (size > PAGE_SIZE) size = PAGE_SIZE; nbuf = realloc(buf, totalsize + size); if (nbuf == NULL) { buf[totalsize - 1] = '\0'; return (buf); } buf = nbuf; totalsize += size; } else { buf[totalsize - 1] = '\0'; return (buf); } } } static const char * strsig2(int sig) { static char tmp[32]; const char *signame; signame = sysdecode_signal(sig); if (signame == NULL) { snprintf(tmp, sizeof(tmp), "%d", sig); signame = tmp; } return (signame); } static void print_kevent(FILE *fp, struct kevent *ke) { switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: case EVFILT_EMPTY: fprintf(fp, "%ju", (uintmax_t)ke->ident); break; case EVFILT_SIGNAL: fputs(strsig2(ke->ident), fp); break; default: fprintf(fp, "%p", (void *)ke->ident); } fprintf(fp, ","); print_integer_arg(sysdecode_kevent_filter, fp, ke->filter); fprintf(fp, ","); print_mask_arg(sysdecode_kevent_flags, fp, ke->flags); fprintf(fp, ","); sysdecode_kevent_fflags(fp, ke->filter, ke->fflags, 16); fprintf(fp, ",%#jx,%p", (uintmax_t)ke->data, ke->udata); } static void print_utrace(FILE *fp, void *utrace_addr, size_t len) { unsigned char *utrace_buffer; fprintf(fp, "{ "); if (sysdecode_utrace(fp, utrace_addr, len)) { fprintf(fp, " }"); return; } utrace_buffer = utrace_addr; fprintf(fp, "%zu:", len); while (len--) fprintf(fp, " %02x", *utrace_buffer++); fprintf(fp, " }"); } static void print_pointer(FILE *fp, uintptr_t arg) { fprintf(fp, "%p", (void *)arg); } static void print_sockaddr(FILE *fp, struct trussinfo *trussinfo, uintptr_t arg, socklen_t len) { char addr[64]; struct sockaddr_in *lsin; struct sockaddr_in6 *lsin6; struct sockaddr_un *sun; struct sockaddr *sa; u_char *q; pid_t pid = trussinfo->curthread->proc->pid; if (arg == 0) { fputs("NULL", fp); return; } /* If the length is too small, just bail. */ if (len < sizeof(*sa)) { print_pointer(fp, arg); return; } sa = calloc(1, len); if (get_struct(pid, arg, sa, len) == -1) { free(sa); print_pointer(fp, arg); return; } switch (sa->sa_family) { case AF_INET: if (len < sizeof(*lsin)) goto sockaddr_short; lsin = (struct sockaddr_in *)(void *)sa; inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET %s:%d }", addr, htons(lsin->sin_port)); break; case AF_INET6: if (len < sizeof(*lsin6)) goto sockaddr_short; lsin6 = (struct sockaddr_in6 *)(void *)sa; inet_ntop(AF_INET6, &lsin6->sin6_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET6 [%s]:%d }", addr, htons(lsin6->sin6_port)); break; case AF_UNIX: sun = (struct sockaddr_un *)sa; fprintf(fp, "{ AF_UNIX \"%.*s\" }", (int)(len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path); break; default: sockaddr_short: fprintf(fp, "{ sa_len = %d, sa_family = %d, sa_data = {", (int)sa->sa_len, (int)sa->sa_family); for (q = (u_char *)sa->sa_data; q < (u_char *)sa + len; q++) fprintf(fp, "%s 0x%02x", q == (u_char *)sa->sa_data ? "" : ",", *q); fputs(" } }", fp); } free(sa); } #define IOV_LIMIT 16 static void print_iovec(FILE *fp, struct trussinfo *trussinfo, uintptr_t arg, int iovcnt) { struct iovec iov[IOV_LIMIT]; size_t max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; size_t len; pid_t pid = trussinfo->curthread->proc->pid; int i; bool buf_truncated, iov_truncated; if (iovcnt <= 0) { print_pointer(fp, arg); return; } if (iovcnt > IOV_LIMIT) { iovcnt = IOV_LIMIT; iov_truncated = true; } else { iov_truncated = false; } if (get_struct(pid, arg, &iov, iovcnt * sizeof(struct iovec)) == -1) { print_pointer(fp, arg); return; } fputs("[", fp); for (i = 0; i < iovcnt; i++) { len = iov[i].iov_len; if (len > max_string) { len = max_string; buf_truncated = true; } else { buf_truncated = false; } fprintf(fp, "%s{", (i > 0) ? "," : ""); if (len && get_struct(pid, (uintptr_t)iov[i].iov_base, &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= (int)max_string) break; len--; buf_truncated = true; } fprintf(fp, "\"%s\"%s", tmp3, buf_truncated ? "..." : ""); free(tmp3); } else { print_pointer(fp, (uintptr_t)iov[i].iov_base); } fprintf(fp, ",%zu}", iov[i].iov_len); } fprintf(fp, "%s%s", iov_truncated ? ",..." : "", "]"); } static void print_sigval(FILE *fp, union sigval *sv) { fprintf(fp, "{ %d, %p }", sv->sival_int, sv->sival_ptr); } static void print_sigevent(FILE *fp, struct sigevent *se) { fputs("{ sigev_notify=", fp); switch (se->sigev_notify) { case SIGEV_NONE: fputs("SIGEV_NONE", fp); break; case SIGEV_SIGNAL: fprintf(fp, "SIGEV_SIGNAL, sigev_signo=%s, sigev_value=", strsig2(se->sigev_signo)); print_sigval(fp, &se->sigev_value); break; case SIGEV_THREAD: fputs("SIGEV_THREAD, sigev_value=", fp); print_sigval(fp, &se->sigev_value); break; case SIGEV_KEVENT: fprintf(fp, "SIGEV_KEVENT, sigev_notify_kqueue=%d, sigev_notify_kevent_flags=", se->sigev_notify_kqueue); print_mask_arg(sysdecode_kevent_flags, fp, se->sigev_notify_kevent_flags); break; case SIGEV_THREAD_ID: fprintf(fp, "SIGEV_THREAD_ID, sigev_notify_thread_id=%d, sigev_signo=%s, sigev_value=", se->sigev_notify_thread_id, strsig2(se->sigev_signo)); print_sigval(fp, &se->sigev_value); break; default: fprintf(fp, "%d", se->sigev_notify); break; } fputs(" }", fp); } static void print_aiocb(FILE *fp, struct aiocb *cb) { fprintf(fp, "{ %d,%jd,%p,%zu,%s,", cb->aio_fildes, cb->aio_offset, cb->aio_buf, cb->aio_nbytes, xlookup(lio_opcodes, cb->aio_lio_opcode)); print_sigevent(fp, &cb->aio_sigevent); fputs(" }", fp); } static void print_gen_cmsg(FILE *fp, struct cmsghdr *cmsghdr) { u_char *q; fputs("{", fp); for (q = CMSG_DATA(cmsghdr); q < (u_char *)cmsghdr + cmsghdr->cmsg_len; q++) { fprintf(fp, "%s0x%02x", q == CMSG_DATA(cmsghdr) ? "" : ",", *q); } fputs("}", fp); } static void print_sctp_initmsg(FILE *fp, struct sctp_initmsg *init) { fprintf(fp, "{out=%u,", init->sinit_num_ostreams); fprintf(fp, "in=%u,", init->sinit_max_instreams); fprintf(fp, "max_rtx=%u,", init->sinit_max_attempts); fprintf(fp, "max_rto=%u}", init->sinit_max_init_timeo); } static void print_sctp_sndrcvinfo(FILE *fp, bool receive, struct sctp_sndrcvinfo *info) { fprintf(fp, "{sid=%u,", info->sinfo_stream); if (receive) { fprintf(fp, "ssn=%u,", info->sinfo_ssn); } fputs("flgs=", fp); sysdecode_sctp_sinfo_flags(fp, info->sinfo_flags); fprintf(fp, ",ppid=%u,", ntohl(info->sinfo_ppid)); if (!receive) { fprintf(fp, "ctx=%u,", info->sinfo_context); fprintf(fp, "ttl=%u,", info->sinfo_timetolive); } if (receive) { fprintf(fp, "tsn=%u,", info->sinfo_tsn); fprintf(fp, "cumtsn=%u,", info->sinfo_cumtsn); } fprintf(fp, "id=%u}", info->sinfo_assoc_id); } static void print_sctp_sndinfo(FILE *fp, struct sctp_sndinfo *info) { fprintf(fp, "{sid=%u,", info->snd_sid); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_snd_flags, fp, info->snd_flags); fprintf(fp, ",ppid=%u,", ntohl(info->snd_ppid)); fprintf(fp, "ctx=%u,", info->snd_context); fprintf(fp, "id=%u}", info->snd_assoc_id); } static void print_sctp_rcvinfo(FILE *fp, struct sctp_rcvinfo *info) { fprintf(fp, "{sid=%u,", info->rcv_sid); fprintf(fp, "ssn=%u,", info->rcv_ssn); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_rcv_flags, fp, info->rcv_flags); fprintf(fp, ",ppid=%u,", ntohl(info->rcv_ppid)); fprintf(fp, "tsn=%u,", info->rcv_tsn); fprintf(fp, "cumtsn=%u,", info->rcv_cumtsn); fprintf(fp, "ctx=%u,", info->rcv_context); fprintf(fp, "id=%u}", info->rcv_assoc_id); } static void print_sctp_nxtinfo(FILE *fp, struct sctp_nxtinfo *info) { fprintf(fp, "{sid=%u,", info->nxt_sid); fputs("flgs=", fp); print_mask_arg(sysdecode_sctp_nxt_flags, fp, info->nxt_flags); fprintf(fp, ",ppid=%u,", ntohl(info->nxt_ppid)); fprintf(fp, "len=%u,", info->nxt_length); fprintf(fp, "id=%u}", info->nxt_assoc_id); } static void print_sctp_prinfo(FILE *fp, struct sctp_prinfo *info) { fputs("{pol=", fp); print_integer_arg(sysdecode_sctp_pr_policy, fp, info->pr_policy); fprintf(fp, ",val=%u}", info->pr_value); } static void print_sctp_authinfo(FILE *fp, struct sctp_authinfo *info) { fprintf(fp, "{num=%u}", info->auth_keynumber); } static void print_sctp_ipv4_addr(FILE *fp, struct in_addr *addr) { char buf[INET_ADDRSTRLEN]; const char *s; s = inet_ntop(AF_INET, addr, buf, INET_ADDRSTRLEN); if (s != NULL) fprintf(fp, "{addr=%s}", s); else fputs("{addr=???}", fp); } static void print_sctp_ipv6_addr(FILE *fp, struct in6_addr *addr) { char buf[INET6_ADDRSTRLEN]; const char *s; s = inet_ntop(AF_INET6, addr, buf, INET6_ADDRSTRLEN); if (s != NULL) fprintf(fp, "{addr=%s}", s); else fputs("{addr=???}", fp); } static void print_sctp_cmsg(FILE *fp, bool receive, struct cmsghdr *cmsghdr) { void *data; socklen_t len; len = cmsghdr->cmsg_len; data = CMSG_DATA(cmsghdr); switch (cmsghdr->cmsg_type) { case SCTP_INIT: if (len == CMSG_LEN(sizeof(struct sctp_initmsg))) print_sctp_initmsg(fp, (struct sctp_initmsg *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_SNDRCV: if (len == CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) print_sctp_sndrcvinfo(fp, receive, (struct sctp_sndrcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; #if 0 case SCTP_EXTRCV: if (len == CMSG_LEN(sizeof(struct sctp_extrcvinfo))) print_sctp_extrcvinfo(fp, (struct sctp_extrcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; #endif case SCTP_SNDINFO: if (len == CMSG_LEN(sizeof(struct sctp_sndinfo))) print_sctp_sndinfo(fp, (struct sctp_sndinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_RCVINFO: if (len == CMSG_LEN(sizeof(struct sctp_rcvinfo))) print_sctp_rcvinfo(fp, (struct sctp_rcvinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_NXTINFO: if (len == CMSG_LEN(sizeof(struct sctp_nxtinfo))) print_sctp_nxtinfo(fp, (struct sctp_nxtinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_PRINFO: if (len == CMSG_LEN(sizeof(struct sctp_prinfo))) print_sctp_prinfo(fp, (struct sctp_prinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_AUTHINFO: if (len == CMSG_LEN(sizeof(struct sctp_authinfo))) print_sctp_authinfo(fp, (struct sctp_authinfo *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_DSTADDRV4: if (len == CMSG_LEN(sizeof(struct in_addr))) print_sctp_ipv4_addr(fp, (struct in_addr *)data); else print_gen_cmsg(fp, cmsghdr); break; case SCTP_DSTADDRV6: if (len == CMSG_LEN(sizeof(struct in6_addr))) print_sctp_ipv6_addr(fp, (struct in6_addr *)data); else print_gen_cmsg(fp, cmsghdr); break; default: print_gen_cmsg(fp, cmsghdr); } } static void print_cmsgs(FILE *fp, pid_t pid, bool receive, struct msghdr *msghdr) { struct cmsghdr *cmsghdr; char *cmsgbuf; const char *temp; socklen_t len; int level, type; bool first; len = msghdr->msg_controllen; if (len == 0) { fputs("{}", fp); return; } cmsgbuf = calloc(1, len); if (get_struct(pid, (uintptr_t)msghdr->msg_control, cmsgbuf, len) == -1) { print_pointer(fp, (uintptr_t)msghdr->msg_control); free(cmsgbuf); return; } msghdr->msg_control = cmsgbuf; first = true; fputs("{", fp); for (cmsghdr = CMSG_FIRSTHDR(msghdr); cmsghdr != NULL; cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr)) { level = cmsghdr->cmsg_level; type = cmsghdr->cmsg_type; len = cmsghdr->cmsg_len; fprintf(fp, "%s{level=", first ? "" : ","); print_integer_arg(sysdecode_sockopt_level, fp, level); fputs(",type=", fp); temp = sysdecode_cmsg_type(level, type); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", type); } fputs(",data=", fp); switch (level) { case IPPROTO_SCTP: print_sctp_cmsg(fp, receive, cmsghdr); break; default: print_gen_cmsg(fp, cmsghdr); break; } fputs("}", fp); first = false; } fputs("}", fp); free(cmsgbuf); } static void print_sysctl_oid(FILE *fp, int *oid, size_t len) { size_t i; bool first; first = true; fprintf(fp, "{ "); for (i = 0; i < len; i++) { fprintf(fp, "%s%d", first ? "" : ".", oid[i]); first = false; } fprintf(fp, " }"); } static void print_sysctl(FILE *fp, int *oid, size_t len) { char name[BUFSIZ]; int qoid[CTL_MAXNAME + 2]; size_t i; qoid[0] = CTL_SYSCTL; qoid[1] = CTL_SYSCTL_NAME; memcpy(qoid + 2, oid, len * sizeof(int)); i = sizeof(name); if (sysctl(qoid, len + 2, name, &i, 0, 0) == -1) print_sysctl_oid(fp, oid, len); else fprintf(fp, "%s", name); } /* * Convert a 32-bit user-space pointer to psaddr_t. Currently, this * sign-extends on MIPS and zero-extends on all other architectures. */ static psaddr_t user_ptr32_to_psaddr(int32_t user_pointer) { #if defined(__mips__) return ((psaddr_t)(intptr_t)user_pointer); #else return ((psaddr_t)(uintptr_t)user_pointer); #endif } /* * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. sc is * a pointer to the syscall description (see above); args is * an array of all of the system call arguments. */ char * print_arg(struct syscall_arg *sc, unsigned long *args, register_t *retval, struct trussinfo *trussinfo) { FILE *fp; char *tmp; size_t tmplen; pid_t pid; fp = open_memstream(&tmp, &tmplen); pid = trussinfo->curthread->proc->pid; switch (sc->type & ARG_MASK) { case Hex: fprintf(fp, "0x%x", (int)args[sc->offset]); break; case Octal: fprintf(fp, "0%o", (int)args[sc->offset]); break; case Int: fprintf(fp, "%d", (int)args[sc->offset]); break; case UInt: fprintf(fp, "%u", (unsigned int)args[sc->offset]); break; case PUInt: { unsigned int val; if (get_struct(pid, args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ %u }", val); else print_pointer(fp, args[sc->offset]); break; } case LongHex: fprintf(fp, "0x%lx", args[sc->offset]); break; case Long: fprintf(fp, "%ld", args[sc->offset]); break; case Sizet: fprintf(fp, "%zu", (size_t)args[sc->offset]); break; case ShmName: /* Handle special SHM_ANON value. */ if ((char *)(uintptr_t)args[sc->offset] == SHM_ANON) { fprintf(fp, "SHM_ANON"); break; } /* FALLTHROUGH */ case Name: { /* NULL-terminated string. */ char *tmp2; tmp2 = get_string(pid, args[sc->offset], 0); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case BinString: { /* * Binary block of data that might have printable characters. * XXX If type|OUT, assume that the length is the syscall's * return value. Otherwise, assume that the length of the block * is in the next syscall argument. */ int max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; int len; int truncated = 0; if (sc->type & OUT) len = retval[0]; else len = args[sc->offset + 1]; /* * Don't print more than max_string characters, to avoid word * wrap. If we have to truncate put some ... after the string. */ if (len > max_string) { len = max_string; truncated = 1; } if (len && get_struct(pid, args[sc->offset], &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string) break; len--; truncated = 1; } fprintf(fp, "\"%s\"%s", tmp3, truncated ? "..." : ""); free(tmp3); } else { print_pointer(fp, args[sc->offset]); } break; } case ExecArgs: case ExecEnv: case StringArray: { psaddr_t addr; union { int32_t strarray32[PAGE_SIZE / sizeof(int32_t)]; int64_t strarray64[PAGE_SIZE / sizeof(int64_t)]; char buf[PAGE_SIZE]; } u; char *string; size_t len; u_int first, i; size_t pointer_size = trussinfo->curthread->proc->abi->pointer_size; /* * Only parse argv[] and environment arrays from exec calls * if requested. */ if (((sc->type & ARG_MASK) == ExecArgs && (trussinfo->flags & EXECVEARGS) == 0) || ((sc->type & ARG_MASK) == ExecEnv && (trussinfo->flags & EXECVEENVS) == 0)) { print_pointer(fp, args[sc->offset]); break; } /* * Read a page of pointers at a time. Punt if the top-level * pointer is not aligned. Note that the first read is of * a partial page. */ addr = args[sc->offset]; if (!__is_aligned(addr, pointer_size)) { print_pointer(fp, args[sc->offset]); break; } len = PAGE_SIZE - (addr & PAGE_MASK); if (get_struct(pid, addr, u.buf, len) == -1) { print_pointer(fp, args[sc->offset]); break; } assert(len > 0); fputc('[', fp); first = 1; i = 0; for (;;) { psaddr_t straddr; if (pointer_size == 4) { straddr = user_ptr32_to_psaddr(u.strarray32[i]); } else if (pointer_size == 8) { straddr = (psaddr_t)u.strarray64[i]; } else { errx(1, "Unsupported pointer size: %zu", pointer_size); } /* Stop once we read the first NULL pointer. */ if (straddr == 0) break; string = get_string(pid, straddr, 0); fprintf(fp, "%s \"%s\"", first ? "" : ",", string); free(string); first = 0; i++; if (i == len / pointer_size) { addr += len; len = PAGE_SIZE; if (get_struct(pid, addr, u.buf, len) == -1) { fprintf(fp, ", "); break; } i = 0; } } fputs(" ]", fp); break; } case Quad: case QuadHex: { uint64_t value; size_t pointer_size = trussinfo->curthread->proc->abi->pointer_size; if (pointer_size == 4) { #if _BYTE_ORDER == _LITTLE_ENDIAN value = (uint64_t)args[sc->offset + 1] << 32 | args[sc->offset]; #else value = (uint64_t)args[sc->offset] << 32 | args[sc->offset + 1]; #endif } else { value = (uint64_t)args[sc->offset]; } if ((sc->type & ARG_MASK) == Quad) fprintf(fp, "%jd", (intmax_t)value); else fprintf(fp, "0x%jx", (intmax_t)value); break; } case PQuadHex: { uint64_t val; if (get_struct(pid, args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ 0x%jx }", (uintmax_t)val); else print_pointer(fp, args[sc->offset]); break; } case Ptr: print_pointer(fp, args[sc->offset]); break; case Readlinkres: { char *tmp2; if (retval[0] == -1) break; tmp2 = get_string(pid, args[sc->offset], retval[0]); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case Ioctl: { const char *temp; unsigned long cmd; cmd = args[sc->offset]; temp = sysdecode_ioctlname(cmd); if (temp) fputs(temp, fp); else { fprintf(fp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }", cmd, cmd & IOC_OUT ? "R" : "", cmd & IOC_IN ? "W" : "", IOCGROUP(cmd), isprint(IOCGROUP(cmd)) ? (char)IOCGROUP(cmd) : '?', cmd & 0xFF, IOCPARM_LEN(cmd)); } break; } case Timespec: { struct timespec ts; if (get_struct(pid, args[sc->offset], &ts, sizeof(ts)) != -1) fprintf(fp, "{ %jd.%09ld }", (intmax_t)ts.tv_sec, ts.tv_nsec); else print_pointer(fp, args[sc->offset]); break; } case Timespec2: { struct timespec ts[2]; const char *sep; unsigned int i; if (get_struct(pid, args[sc->offset], &ts, sizeof(ts)) != -1) { fputs("{ ", fp); sep = ""; for (i = 0; i < nitems(ts); i++) { fputs(sep, fp); sep = ", "; switch (ts[i].tv_nsec) { case UTIME_NOW: fprintf(fp, "UTIME_NOW"); break; case UTIME_OMIT: fprintf(fp, "UTIME_OMIT"); break; default: fprintf(fp, "%jd.%09ld", (intmax_t)ts[i].tv_sec, ts[i].tv_nsec); break; } } fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Timeval: { struct timeval tv; if (get_struct(pid, args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld }", (intmax_t)tv.tv_sec, tv.tv_usec); else print_pointer(fp, args[sc->offset]); break; } case Timeval2: { struct timeval tv[2]; if (get_struct(pid, args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)tv[0].tv_sec, tv[0].tv_usec, (intmax_t)tv[1].tv_sec, tv[1].tv_usec); else print_pointer(fp, args[sc->offset]); break; } case Itimerval: { struct itimerval itv; if (get_struct(pid, args[sc->offset], &itv, sizeof(itv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)itv.it_interval.tv_sec, itv.it_interval.tv_usec, (intmax_t)itv.it_value.tv_sec, itv.it_value.tv_usec); else print_pointer(fp, args[sc->offset]); break; } case LinuxSockArgs: { struct linux_socketcall_args largs; if (get_struct(pid, args[sc->offset], (void *)&largs, sizeof(largs)) != -1) fprintf(fp, "{ %s, 0x%lx }", lookup(linux_socketcall_ops, largs.what, 10), (long unsigned int)largs.args); else print_pointer(fp, args[sc->offset]); break; } case Pollfd: { /* * XXX: A Pollfd argument expects the /next/ syscall argument * to be the number of fds in the array. This matches the poll * syscall. */ struct pollfd *pfd; int numfds = args[sc->offset + 1]; size_t bytes = sizeof(struct pollfd) * numfds; int i; if ((pfd = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for pollfd array", bytes); if (get_struct(pid, args[sc->offset], pfd, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { fprintf(fp, " %d/%s", pfd[i].fd, xlookup_bits(poll_flags, pfd[i].events)); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(pfd); break; } case Fd_set: { /* * XXX: A Fd_set argument expects the /first/ syscall argument * to be the number of fds in the array. This matches the * select syscall. */ fd_set *fds; int numfds = args[0]; size_t bytes = _howmany(numfds, _NFDBITS) * _NFDBITS; int i; if ((fds = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for fd_set array", bytes); if (get_struct(pid, args[sc->offset], fds, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { if (FD_ISSET(i, fds)) fprintf(fp, " %d", i); } fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); free(fds); break; } case Signal: fputs(strsig2(args[sc->offset]), fp); break; case Sigset: { sigset_t ss; int i, first; if (get_struct(pid, args[sc->offset], (void *)&ss, sizeof(ss)) == -1) { print_pointer(fp, args[sc->offset]); break; } fputs("{ ", fp); first = 1; for (i = 1; i < sys_nsig; i++) { if (sigismember(&ss, i)) { fprintf(fp, "%s%s", !first ? "|" : "", strsig2(i)); first = 0; } } if (!first) fputc(' ', fp); fputc('}', fp); break; } case Sigprocmask: print_integer_arg(sysdecode_sigprocmask_how, fp, args[sc->offset]); break; case Fcntlflag: /* XXX: Output depends on the value of the previous argument. */ if (sysdecode_fcntl_arg_p(args[sc->offset - 1])) sysdecode_fcntl_arg(fp, args[sc->offset - 1], args[sc->offset], 16); break; case Open: print_mask_arg(sysdecode_open_flags, fp, args[sc->offset]); break; case Fcntl: print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]); break; + case Closerangeflags: + print_mask_arg(sysdecode_close_range_flags, fp, args[sc->offset]); + break; case Mprot: print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]); break; case Mmapflags: print_mask_arg(sysdecode_mmap_flags, fp, args[sc->offset]); break; case Whence: print_integer_arg(sysdecode_whence, fp, args[sc->offset]); break; case ShmFlags: print_mask_arg(sysdecode_shmflags, fp, args[sc->offset]); break; case Sockdomain: print_integer_arg(sysdecode_socketdomain, fp, args[sc->offset]); break; case Socktype: print_mask_arg(sysdecode_socket_type, fp, args[sc->offset]); break; case Shutdown: print_integer_arg(sysdecode_shutdown_how, fp, args[sc->offset]); break; case Resource: print_integer_arg(sysdecode_rlimit, fp, args[sc->offset]); break; case RusageWho: print_integer_arg(sysdecode_getrusage_who, fp, args[sc->offset]); break; case Pathconf: print_integer_arg(sysdecode_pathconf_name, fp, args[sc->offset]); break; case Rforkflags: print_mask_arg(sysdecode_rfork_flags, fp, args[sc->offset]); break; case Sockaddr: { socklen_t len; if (args[sc->offset] == 0) { fputs("NULL", fp); break; } /* * Extract the address length from the next argument. If * this is an output sockaddr (OUT is set), then the * next argument is a pointer to a socklen_t. Otherwise * the next argument contains a socklen_t by value. */ if (sc->type & OUT) { if (get_struct(pid, args[sc->offset + 1], &len, sizeof(len)) == -1) { print_pointer(fp, args[sc->offset]); break; } } else len = args[sc->offset + 1]; print_sockaddr(fp, trussinfo, args[sc->offset], len); break; } case Sigaction: { struct sigaction sa; if (get_struct(pid, args[sc->offset], &sa, sizeof(sa)) != -1) { fputs("{ ", fp); if (sa.sa_handler == SIG_DFL) fputs("SIG_DFL", fp); else if (sa.sa_handler == SIG_IGN) fputs("SIG_IGN", fp); else fprintf(fp, "%p", sa.sa_handler); fprintf(fp, " %s ss_t }", xlookup_bits(sigaction_flags, sa.sa_flags)); } else print_pointer(fp, args[sc->offset]); break; } case Sigevent: { struct sigevent se; if (get_struct(pid, args[sc->offset], &se, sizeof(se)) != -1) print_sigevent(fp, &se); else print_pointer(fp, args[sc->offset]); break; } case Kevent: { /* * XXX XXX: The size of the array is determined by either the * next syscall argument, or by the syscall return value, * depending on which argument number we are. This matches the * kevent syscall, but luckily that's the only syscall that uses * them. */ struct kevent *ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct kevent) * numevents; if ((ke = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke = NULL; if (numevents >= 0 && get_struct(pid, args[sc->offset], ke, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); print_kevent(fp, &ke[i]); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(ke); break; } case Kevent11: { struct freebsd11_kevent *ke11; struct kevent ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct freebsd11_kevent) * numevents; if ((ke11 = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke11 = NULL; memset(&ke, 0, sizeof(ke)); if (numevents >= 0 && get_struct(pid, args[sc->offset], ke11, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); ke.ident = ke11[i].ident; ke.filter = ke11[i].filter; ke.flags = ke11[i].flags; ke.fflags = ke11[i].fflags; ke.data = ke11[i].data; ke.udata = ke11[i].udata; print_kevent(fp, &ke); } fputs(" }", fp); } else { print_pointer(fp, args[sc->offset]); } free(ke11); break; } case Stat: { struct stat st; if (get_struct(pid, args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { print_pointer(fp, args[sc->offset]); } break; } case Stat11: { struct freebsd11_stat st; if (get_struct(pid, args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { print_pointer(fp, args[sc->offset]); } break; } case StatFs: { unsigned int i; struct statfs buf; if (get_struct(pid, args[sc->offset], &buf, sizeof(buf)) != -1) { char fsid[17]; bzero(fsid, sizeof(fsid)); if (buf.f_fsid.val[0] != 0 || buf.f_fsid.val[1] != 0) { for (i = 0; i < sizeof(buf.f_fsid); i++) snprintf(&fsid[i*2], sizeof(fsid) - (i*2), "%02x", ((u_char *)&buf.f_fsid)[i]); } fprintf(fp, "{ fstypename=%s,mntonname=%s,mntfromname=%s," "fsid=%s }", buf.f_fstypename, buf.f_mntonname, buf.f_mntfromname, fsid); } else print_pointer(fp, args[sc->offset]); break; } case Rusage: { struct rusage ru; if (get_struct(pid, args[sc->offset], &ru, sizeof(ru)) != -1) { fprintf(fp, "{ u=%jd.%06ld,s=%jd.%06ld,in=%ld,out=%ld }", (intmax_t)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, (intmax_t)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec, ru.ru_inblock, ru.ru_oublock); } else print_pointer(fp, args[sc->offset]); break; } case Rlimit: { struct rlimit rl; if (get_struct(pid, args[sc->offset], &rl, sizeof(rl)) != -1) { fprintf(fp, "{ cur=%ju,max=%ju }", rl.rlim_cur, rl.rlim_max); } else print_pointer(fp, args[sc->offset]); break; } case ExitStatus: { int status; if (get_struct(pid, args[sc->offset], &status, sizeof(status)) != -1) { fputs("{ ", fp); if (WIFCONTINUED(status)) fputs("CONTINUED", fp); else if (WIFEXITED(status)) fprintf(fp, "EXITED,val=%d", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(fp, "SIGNALED,sig=%s%s", strsig2(WTERMSIG(status)), WCOREDUMP(status) ? ",cored" : ""); else fprintf(fp, "STOPPED,sig=%s", strsig2(WTERMSIG(status))); fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Waitoptions: print_mask_arg(sysdecode_wait6_options, fp, args[sc->offset]); break; case Idtype: print_integer_arg(sysdecode_idtype, fp, args[sc->offset]); break; case Procctl: print_integer_arg(sysdecode_procctl_cmd, fp, args[sc->offset]); break; case Umtxop: { int rem; if (print_mask_arg_part(sysdecode_umtx_op_flags, fp, args[sc->offset], &rem)) fprintf(fp, "|"); print_integer_arg(sysdecode_umtx_op, fp, rem); break; } case Atfd: print_integer_arg(sysdecode_atfd, fp, args[sc->offset]); break; case Atflags: print_mask_arg(sysdecode_atflags, fp, args[sc->offset]); break; case Accessmode: print_mask_arg(sysdecode_access_mode, fp, args[sc->offset]); break; case Sysarch: print_integer_arg(sysdecode_sysarch_number, fp, args[sc->offset]); break; case Sysctl: { char name[BUFSIZ]; int oid[CTL_MAXNAME + 2]; size_t len; memset(name, 0, sizeof(name)); len = args[sc->offset + 1]; if (get_struct(pid, args[sc->offset], oid, len * sizeof(oid[0])) != -1) { fprintf(fp, "\""); if (oid[0] == CTL_SYSCTL) { fprintf(fp, "sysctl."); switch (oid[1]) { case CTL_SYSCTL_DEBUG: fprintf(fp, "debug"); break; case CTL_SYSCTL_NAME: fprintf(fp, "name "); print_sysctl_oid(fp, oid + 2, len - 2); break; case CTL_SYSCTL_NEXT: fprintf(fp, "next"); break; case CTL_SYSCTL_NAME2OID: fprintf(fp, "name2oid %s", get_string(pid, args[sc->offset + 4], args[sc->offset + 5])); break; case CTL_SYSCTL_OIDFMT: fprintf(fp, "oidfmt "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_OIDDESCR: fprintf(fp, "oiddescr "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_OIDLABEL: fprintf(fp, "oidlabel "); print_sysctl(fp, oid + 2, len - 2); break; case CTL_SYSCTL_NEXTNOSKIP: fprintf(fp, "nextnoskip"); break; default: print_sysctl(fp, oid + 1, len - 1); } } else { print_sysctl(fp, oid, len); } fprintf(fp, "\""); } break; } case PipeFds: /* * The pipe() system call in the kernel returns its * two file descriptors via return values. However, * the interface exposed by libc is that pipe() * accepts a pointer to an array of descriptors. * Format the output to match the libc API by printing * the returned file descriptors as a fake argument. * * Overwrite the first retval to signal a successful * return as well. */ fprintf(fp, "{ %d, %d }", (int)retval[0], (int)retval[1]); retval[0] = 0; break; case Utrace: { size_t len; void *utrace_addr; len = args[sc->offset + 1]; utrace_addr = calloc(1, len); if (get_struct(pid, args[sc->offset], (void *)utrace_addr, len) != -1) print_utrace(fp, utrace_addr, len); else print_pointer(fp, args[sc->offset]); free(utrace_addr); break; } case IntArray: { int descriptors[16]; unsigned long i, ndescriptors; bool truncated; ndescriptors = args[sc->offset + 1]; truncated = false; if (ndescriptors > nitems(descriptors)) { ndescriptors = nitems(descriptors); truncated = true; } if (get_struct(pid, args[sc->offset], descriptors, ndescriptors * sizeof(descriptors[0])) != -1) { fprintf(fp, "{"); for (i = 0; i < ndescriptors; i++) fprintf(fp, i == 0 ? " %d" : ", %d", descriptors[i]); fprintf(fp, truncated ? ", ... }" : " }"); } else print_pointer(fp, args[sc->offset]); break; } case Pipe2: print_mask_arg(sysdecode_pipe2_flags, fp, args[sc->offset]); break; case CapFcntlRights: { uint32_t rights; if (sc->type & OUT) { if (get_struct(pid, args[sc->offset], &rights, sizeof(rights)) == -1) { print_pointer(fp, args[sc->offset]); break; } } else rights = args[sc->offset]; print_mask_arg32(sysdecode_cap_fcntlrights, fp, rights); break; } case Fadvice: print_integer_arg(sysdecode_fadvice, fp, args[sc->offset]); break; case FileFlags: { fflags_t rem; if (!sysdecode_fileflags(fp, args[sc->offset], &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); break; } case Flockop: print_mask_arg(sysdecode_flock_operation, fp, args[sc->offset]); break; case Getfsstatmode: print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; case Kldsymcmd: print_integer_arg(sysdecode_kldsym_cmd, fp, args[sc->offset]); break; case Kldunloadflags: print_integer_arg(sysdecode_kldunload_flags, fp, args[sc->offset]); break; case AiofsyncOp: fputs(xlookup(aio_fsync_ops, args[sc->offset]), fp); break; case LioMode: fputs(xlookup(lio_modes, args[sc->offset]), fp); break; case Madvice: print_integer_arg(sysdecode_madvice, fp, args[sc->offset]); break; case Socklent: fprintf(fp, "%u", (socklen_t)args[sc->offset]); break; case Sockprotocol: { const char *temp; int domain, protocol; domain = args[sc->offset - 2]; protocol = args[sc->offset]; if (protocol == 0) { fputs("0", fp); } else { temp = sysdecode_socket_protocol(domain, protocol); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", protocol); } } break; } case Sockoptlevel: print_integer_arg(sysdecode_sockopt_level, fp, args[sc->offset]); break; case Sockoptname: { const char *temp; int level, name; level = args[sc->offset - 1]; name = args[sc->offset]; temp = sysdecode_sockopt_name(level, name); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", name); } break; } case Msgflags: print_mask_arg(sysdecode_msg_flags, fp, args[sc->offset]); break; case CapRights: { cap_rights_t rights; if (get_struct(pid, args[sc->offset], &rights, sizeof(rights)) != -1) { fputs("{ ", fp); sysdecode_cap_rights(fp, &rights); fputs(" }", fp); } else print_pointer(fp, args[sc->offset]); break; } case Acltype: print_integer_arg(sysdecode_acltype, fp, args[sc->offset]); break; case Extattrnamespace: print_integer_arg(sysdecode_extattrnamespace, fp, args[sc->offset]); break; case Minherit: print_integer_arg(sysdecode_minherit_inherit, fp, args[sc->offset]); break; case Mlockall: print_mask_arg(sysdecode_mlockall_flags, fp, args[sc->offset]); break; case Mountflags: print_mask_arg(sysdecode_mount_flags, fp, args[sc->offset]); break; case Msync: print_mask_arg(sysdecode_msync_flags, fp, args[sc->offset]); break; case Priowhich: print_integer_arg(sysdecode_prio_which, fp, args[sc->offset]); break; case Ptraceop: print_integer_arg(sysdecode_ptrace_request, fp, args[sc->offset]); break; case Sendfileflags: print_mask_arg(sysdecode_sendfile_flags, fp, args[sc->offset]); break; case Sendfilehdtr: { struct sf_hdtr hdtr; if (get_struct(pid, args[sc->offset], &hdtr, sizeof(hdtr)) != -1) { fprintf(fp, "{"); print_iovec(fp, trussinfo, (uintptr_t)hdtr.headers, hdtr.hdr_cnt); print_iovec(fp, trussinfo, (uintptr_t)hdtr.trailers, hdtr.trl_cnt); fprintf(fp, "}"); } else print_pointer(fp, args[sc->offset]); break; } case Quotactlcmd: if (!sysdecode_quotactl_cmd(fp, args[sc->offset])) fprintf(fp, "%#x", (int)args[sc->offset]); break; case Reboothowto: print_mask_arg(sysdecode_reboot_howto, fp, args[sc->offset]); break; case Rtpriofunc: print_integer_arg(sysdecode_rtprio_function, fp, args[sc->offset]); break; case Schedpolicy: print_integer_arg(sysdecode_scheduler_policy, fp, args[sc->offset]); break; case Schedparam: { struct sched_param sp; if (get_struct(pid, args[sc->offset], &sp, sizeof(sp)) != -1) fprintf(fp, "{ %d }", sp.sched_priority); else print_pointer(fp, args[sc->offset]); break; } case PSig: { int sig; if (get_struct(pid, args[sc->offset], &sig, sizeof(sig)) == 0) fprintf(fp, "{ %s }", strsig2(sig)); else print_pointer(fp, args[sc->offset]); break; } case Siginfo: { siginfo_t si; if (get_struct(pid, args[sc->offset], &si, sizeof(si)) != -1) { fprintf(fp, "{ signo=%s", strsig2(si.si_signo)); decode_siginfo(fp, &si); fprintf(fp, " }"); } else print_pointer(fp, args[sc->offset]); break; } case Iovec: /* * Print argument as an array of struct iovec, where the next * syscall argument is the number of elements of the array. */ print_iovec(fp, trussinfo, args[sc->offset], (int)args[sc->offset + 1]); break; case Aiocb: { struct aiocb cb; if (get_struct(pid, args[sc->offset], &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, args[sc->offset]); break; } case AiocbArray: { /* * Print argment as an array of pointers to struct aiocb, where * the next syscall argument is the number of elements. */ uintptr_t cbs[16]; unsigned int nent; bool truncated; nent = args[sc->offset + 1]; truncated = false; if (nent > nitems(cbs)) { nent = nitems(cbs); truncated = true; } if (get_struct(pid, args[sc->offset], cbs, sizeof(uintptr_t) * nent) != -1) { unsigned int i; fputs("[", fp); for (i = 0; i < nent; ++i) { struct aiocb cb; if (i > 0) fputc(',', fp); if (get_struct(pid, cbs[i], &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, cbs[i]); } if (truncated) fputs(",...", fp); fputs("]", fp); } else print_pointer(fp, args[sc->offset]); break; } case AiocbPointer: { /* * aio_waitcomplete(2) assigns a pointer to a pointer to struct * aiocb, so we need to handle the extra layer of indirection. */ uintptr_t cbp; struct aiocb cb; if (get_struct(pid, args[sc->offset], &cbp, sizeof(cbp)) != -1) { if (get_struct(pid, cbp, &cb, sizeof(cb)) != -1) print_aiocb(fp, &cb); else print_pointer(fp, cbp); } else print_pointer(fp, args[sc->offset]); break; } case Sctpsndrcvinfo: { struct sctp_sndrcvinfo info; if (get_struct(pid, args[sc->offset], &info, sizeof(struct sctp_sndrcvinfo)) == -1) { print_pointer(fp, args[sc->offset]); break; } print_sctp_sndrcvinfo(fp, sc->type & OUT, &info); break; } case Msghdr: { struct msghdr msghdr; if (get_struct(pid, args[sc->offset], &msghdr, sizeof(struct msghdr)) == -1) { print_pointer(fp, args[sc->offset]); break; } fputs("{", fp); print_sockaddr(fp, trussinfo, (uintptr_t)msghdr.msg_name, msghdr.msg_namelen); fprintf(fp, ",%d,", msghdr.msg_namelen); print_iovec(fp, trussinfo, (uintptr_t)msghdr.msg_iov, msghdr.msg_iovlen); fprintf(fp, ",%d,", msghdr.msg_iovlen); print_cmsgs(fp, pid, sc->type & OUT, &msghdr); fprintf(fp, ",%u,", msghdr.msg_controllen); print_mask_arg(sysdecode_msg_flags, fp, msghdr.msg_flags); fputs("}", fp); break; } default: errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK); } fclose(fp); return (tmp); } /* * Print (to outfile) the system call and its arguments. */ void print_syscall(struct trussinfo *trussinfo) { struct threadinfo *t; const char *name; char **s_args; int i, len, nargs; t = trussinfo->curthread; name = t->cs.sc->name; nargs = t->cs.nargs; s_args = t->cs.s_args; len = print_line_prefix(trussinfo); len += fprintf(trussinfo->outfile, "%s(", name); for (i = 0; i < nargs; i++) { if (s_args[i] != NULL) len += fprintf(trussinfo->outfile, "%s", s_args[i]); else len += fprintf(trussinfo->outfile, ""); len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ? "," : ""); } len += fprintf(trussinfo->outfile, ")"); for (i = 0; i < 6 - (len / 8); i++) fprintf(trussinfo->outfile, "\t"); } void print_syscall_ret(struct trussinfo *trussinfo, int error, register_t *retval) { struct timespec timediff; struct threadinfo *t; struct syscall *sc; t = trussinfo->curthread; sc = t->cs.sc; if (trussinfo->flags & COUNTONLY) { timespecsub(&t->after, &t->before, &timediff); timespecadd(&sc->time, &timediff, &sc->time); sc->ncalls++; if (error != 0) sc->nerror++; return; } print_syscall(trussinfo); fflush(trussinfo->outfile); if (retval == NULL) { /* * This system call resulted in the current thread's exit, * so there is no return value or error to display. */ fprintf(trussinfo->outfile, "\n"); return; } if (error == ERESTART) fprintf(trussinfo->outfile, " ERESTART\n"); else if (error == EJUSTRETURN) fprintf(trussinfo->outfile, " EJUSTRETURN\n"); else if (error != 0) { fprintf(trussinfo->outfile, " ERR#%d '%s'\n", sysdecode_freebsd_to_abi_errno(t->proc->abi->abi, error), strerror(error)); } else if (sc->decode.ret_type == 2 && t->proc->abi->pointer_size == 4) { off_t off; #if _BYTE_ORDER == _LITTLE_ENDIAN off = (off_t)retval[1] << 32 | retval[0]; #else off = (off_t)retval[0] << 32 | retval[1]; #endif fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)off, (intmax_t)off); } else { fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)retval[0], (intmax_t)retval[0]); } } void print_summary(struct trussinfo *trussinfo) { struct timespec total = {0, 0}; struct syscall *sc; int ncall, nerror; fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n", "syscall", "seconds", "calls", "errors"); ncall = nerror = 0; STAILQ_FOREACH(sc, &seen_syscalls, entries) { if (sc->ncalls) { fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", sc->name, (intmax_t)sc->time.tv_sec, sc->time.tv_nsec, sc->ncalls, sc->nerror); timespecadd(&total, &sc->time, &total); ncall += sc->ncalls; nerror += sc->nerror; } } fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n", "", "-------------", "-------", "-------"); fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror); }