Index: head/sys/sys/mman.h =================================================================== --- head/sys/sys/mman.h (revision 273249) +++ head/sys/sys/mman.h (revision 273250) @@ -1,273 +1,273 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mman.h 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_MMAN_H_ #define _SYS_MMAN_H_ #include #include #if __BSD_VISIBLE /* * Inheritance for minherit() */ #define INHERIT_SHARE 0 #define INHERIT_COPY 1 #define INHERIT_NONE 2 #endif /* * Protections are chosen from these bits, or-ed together */ #define PROT_NONE 0x00 /* no permissions */ #define PROT_READ 0x01 /* pages can be read */ #define PROT_WRITE 0x02 /* pages can be written */ #define PROT_EXEC 0x04 /* pages can be executed */ /* * Flags contain sharing type and options. * Sharing types; choose one. */ #define MAP_SHARED 0x0001 /* share changes */ #define MAP_PRIVATE 0x0002 /* changes are private */ #if __BSD_VISIBLE #define MAP_COPY MAP_PRIVATE /* Obsolete */ #endif /* * Other flags */ #define MAP_FIXED 0x0010 /* map addr must be exactly as requested */ #if __BSD_VISIBLE -#define MAP_RENAME 0x0020 /* Sun: rename private pages to file */ -#define MAP_NORESERVE 0x0040 /* Sun: don't reserve needed swap area */ +#define MAP_RESERVED0020 0x0020 /* previously unimplemented MAP_RENAME */ +#define MAP_RESERVED0040 0x0040 /* previously unimplemented MAP_NORESERVE */ #define MAP_RESERVED0080 0x0080 /* previously misimplemented MAP_INHERIT */ #define MAP_RESERVED0100 0x0100 /* previously unimplemented MAP_NOEXTEND */ #define MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */ #define MAP_STACK 0x0400 /* region grows down, like a stack */ #define MAP_NOSYNC 0x0800 /* page to but do not sync underlying file */ /* * Mapping type */ #define MAP_FILE 0x0000 /* map from file (default) */ #define MAP_ANON 0x1000 /* allocated from memory, swap space */ #ifndef _KERNEL #define MAP_ANONYMOUS MAP_ANON /* For compatibility. */ #endif /* !_KERNEL */ /* * Extended flags */ #define MAP_EXCL 0x00004000 /* for MAP_FIXED, fail if address is used */ #define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */ #define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */ #ifdef __LP64__ #define MAP_32BIT 0x00080000 /* map in the low 2GB of address space */ #endif /* * Request specific alignment (n == log2 of the desired alignment). * * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does * not enforce a specific alignment. */ #define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT) #define MAP_ALIGNMENT_SHIFT 24 #define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff) #define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */ #endif /* __BSD_VISIBLE */ #if __POSIX_VISIBLE >= 199309 /* * Process memory locking */ #define MCL_CURRENT 0x0001 /* Lock only current memory */ #define MCL_FUTURE 0x0002 /* Lock all future memory as well */ #endif /* * Error return from mmap() */ #define MAP_FAILED ((void *)-1) /* * msync() flags */ #define MS_SYNC 0x0000 /* msync synchronously */ #define MS_ASYNC 0x0001 /* return immediately */ #define MS_INVALIDATE 0x0002 /* invalidate all cached data */ /* * Advice to madvise */ #define _MADV_NORMAL 0 /* no further special treatment */ #define _MADV_RANDOM 1 /* expect random page references */ #define _MADV_SEQUENTIAL 2 /* expect sequential page references */ #define _MADV_WILLNEED 3 /* will need these pages */ #define _MADV_DONTNEED 4 /* dont need these pages */ #if __BSD_VISIBLE #define MADV_NORMAL _MADV_NORMAL #define MADV_RANDOM _MADV_RANDOM #define MADV_SEQUENTIAL _MADV_SEQUENTIAL #define MADV_WILLNEED _MADV_WILLNEED #define MADV_DONTNEED _MADV_DONTNEED #define MADV_FREE 5 /* dont need these pages, and junk contents */ #define MADV_NOSYNC 6 /* try to avoid flushes to physical media */ #define MADV_AUTOSYNC 7 /* revert to default flushing strategy */ #define MADV_NOCORE 8 /* do not include these pages in a core file */ #define MADV_CORE 9 /* revert to including pages in a core file */ #define MADV_PROTECT 10 /* protect process from pageout kill */ /* * Return bits from mincore */ #define MINCORE_INCORE 0x1 /* Page is incore */ #define MINCORE_REFERENCED 0x2 /* Page has been referenced by us */ #define MINCORE_MODIFIED 0x4 /* Page has been modified by us */ #define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */ #define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */ #define MINCORE_SUPER 0x20 /* Page is a "super" page */ /* * Anonymous object constant for shm_open(). */ #define SHM_ANON ((char *)1) #endif /* __BSD_VISIBLE */ /* * XXX missing POSIX_TYPED_MEM_* macros and * posix_typed_mem_info structure. */ #if __POSIX_VISIBLE >= 200112 #define POSIX_MADV_NORMAL _MADV_NORMAL #define POSIX_MADV_RANDOM _MADV_RANDOM #define POSIX_MADV_SEQUENTIAL _MADV_SEQUENTIAL #define POSIX_MADV_WILLNEED _MADV_WILLNEED #define POSIX_MADV_DONTNEED _MADV_DONTNEED #endif #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; #define _MODE_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; #define _OFF_T_DECLARED #endif #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #if defined(_KERNEL) || defined(_WANT_FILE) #include #include #include #include #include struct file; struct shmfd { size_t shm_size; vm_object_t shm_object; int shm_refs; uid_t shm_uid; gid_t shm_gid; mode_t shm_mode; int shm_kmappings; /* * Values maintained solely to make this a better-behaved file * descriptor for fstat() to run on. */ struct timespec shm_atime; struct timespec shm_mtime; struct timespec shm_ctime; struct timespec shm_birthtime; ino_t shm_ino; struct label *shm_label; /* MAC label */ const char *shm_path; struct rangelock shm_rl; struct mtx shm_mtx; }; #endif #ifdef _KERNEL int shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, vm_object_t *obj); int shm_map(struct file *fp, size_t size, off_t offset, void **memp); int shm_unmap(struct file *fp, void *mem, size_t size); #else /* !_KERNEL */ __BEGIN_DECLS /* * XXX not yet implemented: posix_mem_offset(), posix_typed_mem_get_info(), * posix_typed_mem_open(). */ #if __BSD_VISIBLE int getpagesizes(size_t *, int); int madvise(void *, size_t, int); int mincore(const void *, size_t, char *); int minherit(void *, size_t, int); #endif int mlock(const void *, size_t); #ifndef _MMAP_DECLARED #define _MMAP_DECLARED void * mmap(void *, size_t, int, int, int, off_t); #endif int mprotect(const void *, size_t, int); int msync(void *, size_t, int); int munlock(const void *, size_t); int munmap(void *, size_t); #if __POSIX_VISIBLE >= 200112 int posix_madvise(void *, size_t, int); #endif #if __POSIX_VISIBLE >= 199309 int mlockall(int); int munlockall(void); int shm_open(const char *, int, mode_t); int shm_unlink(const char *); #endif __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_MMAN_H_ */ Index: head/sys/sys/param.h =================================================================== --- head/sys/sys/param.h (revision 273249) +++ head/sys/sys/param.h (revision 273250) @@ -1,348 +1,349 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD$ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #include #define BSD 199506 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 /* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. * Currently this lives here in the doc/ repository: * * head/en_US.ISO8859-1/books/porters-handbook/book.xml * * scheme is: Rxx * 'R' is in the range 0 to 4 if this is a release branch or * x.0-CURRENT before RELENG_*_0 is created, otherwise 'R' is * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100038 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100039 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, * which by definition is always true on FreeBSD. This macro is also defined * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD. * * It is tempting to use this macro in userland code when we want to enable * kernel-specific routines, and in fact it's fine to do this in code that * is part of FreeBSD itself. However, be aware that as presence of this * macro is still not widespread (e.g. older FreeBSD versions, 3rd party * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in * external applications without also checking for __FreeBSD__ as an * alternative. */ #undef __FreeBSD_kernel__ #define __FreeBSD_kernel__ #ifdef _KERNEL #define P_OSREL_SIGWAIT 700000 #define P_OSREL_SIGSEGV 700004 #define P_OSREL_MAP_ANON 800104 #define P_OSREL_MAP_FSTRICT 1100036 +#define P_OSREL_MAP_RENAME 1100039 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) */ #include #define MAXCOMLEN 19 /* max command name remembered */ #define MAXINTERP PATH_MAX /* max interpreter file name length */ #define MAXLOGNAME 33 /* max login name length (incl. NUL) */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS (NGROUPS_MAX+1) /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ #define SPECNAMELEN 63 /* max length of devicename */ /* More types and definitions used throughout the kernel. */ #ifdef _KERNEL #include #include #ifndef LOCORE #include #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #endif #ifndef _KERNEL /* Signals. */ #include #endif /* Machine type dependent parameters. */ #include #ifndef _KERNEL #include #endif #ifndef DEV_BSHIFT #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #endif #define DEV_BSIZE (1<>PAGE_SHIFT) #endif /* * btodb() is messy and perhaps slow because `bytes' may be an off_t. We * want to shift an unsigned type to avoid sign extension and we don't * want to widen `bytes' unnecessarily. Assume that the result fits in * a daddr_t. */ #ifndef btodb #define btodb(bytes) /* calculates (bytes / DEV_BSIZE) */ \ (sizeof (bytes) > sizeof(long) \ ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \ : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT)) #endif #ifndef dbtob #define dbtob(db) /* calculates (db * DEV_BSIZE) */ \ ((off_t)(db) << DEV_BSHIFT) #endif #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ #define NBBY 8 /* number of bits in a byte */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ /* * File system parameters and macros. * * MAXBSIZE - Filesystems are made out of blocks of at most MAXBSIZE bytes * per block. MAXBSIZE may be made larger without effecting * any existing filesystems as long as it does not exceed MAXPHYS, * and may be made smaller at the risk of not being able to use * filesystems which require a block size exceeding MAXBSIZE. * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you * make it too big the kernel will not be able to optimally use * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a * normal UFS filesystem. */ #define MAXBSIZE 65536 /* must be power of 2 */ #define BKVASIZE 16384 /* must be power of 2 */ #define BKVAMASK (BKVASIZE-1) /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) \ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) \ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #define rounddown(x, y) (((x)/(y))*(y)) #define rounddown2(x, y) ((x)&(~((y)-1))) /* if y is power of two */ #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #ifdef _KERNEL /* * Basic byte order function prototypes for non-inline functions. */ #ifndef LOCORE #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS __uint32_t htonl(__uint32_t); __uint16_t htons(__uint16_t); __uint32_t ntohl(__uint32_t); __uint16_t ntohs(__uint16_t); __END_DECLS #endif #endif #ifndef lint #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif /* !_BYTEORDER_FUNC_DEFINED */ #endif /* lint */ #endif /* _KERNEL */ /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1<> (PAGE_SHIFT - DEV_BSHIFT)) #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) /* * Old spelling of __containerof(). */ #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) /* * Access a variable length array that has been declared as a fixed * length array. */ #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset]) #endif /* _SYS_PARAM_H_ */ Index: head/sys/vm/vm_mmap.c =================================================================== --- head/sys/vm/vm_mmap.c (revision 273249) +++ head/sys/vm/vm_mmap.c (revision 273250) @@ -1,1717 +1,1723 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 */ /* * Mapped file (mmap) interface to VM */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_hwpmc_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif int old_mlock = 0; SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, "Do not apply RLIMIT_MEMLOCK on mlockall"); #ifdef MAP_32BIT #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) #endif static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, vm_ooffset_t *, vm_object_t *); static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct shmfd *, vm_ooffset_t, vm_object_t *); #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { int incr; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_sbrk(td, uap) struct thread *td; struct sbrk_args *uap; { /* Not yet implemented */ return (EOPNOTSUPP); } #ifndef _SYS_SYSPROTO_H_ struct sstk_args { int incr; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_sstk(td, uap) struct thread *td; struct sstk_args *uap; { /* Not yet implemented */ return (EOPNOTSUPP); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct getpagesize_args { int dummy; }; #endif int ogetpagesize(td, uap) struct thread *td; struct getpagesize_args *uap; { /* MP SAFE */ td->td_retval[0] = PAGE_SIZE; return (0); } #endif /* COMPAT_43 */ /* * Memory Map (mmap) system call. Note that the file offset * and address are allowed to be NOT page aligned, though if * the MAP_FIXED flag it set, both must have the same remainder * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not * page-aligned, the actual mapping starts at trunc_page(addr) * and the return value is adjusted up by the page offset. * * Generally speaking, only character devices which are themselves * memory-based, such as a video framebuffer, can be mmap'd. Otherwise * there would be no cache coherency between a descriptor and a VM mapping * both to the same character device. */ #ifndef _SYS_SYSPROTO_H_ struct mmap_args { void *addr; size_t len; int prot; int flags; int fd; long pad; off_t pos; }; #endif /* * MPSAFE */ int sys_mmap(td, uap) struct thread *td; struct mmap_args *uap; { #ifdef HWPMC_HOOKS struct pmckern_map_in pkm; #endif struct file *fp; struct vnode *vp; vm_offset_t addr; vm_size_t size, pageoff; vm_prot_t cap_maxprot, maxprot; void *handle; objtype_t handle_type; int align, error, flags, prot; off_t pos; struct vmspace *vms = td->td_proc->p_vmspace; cap_rights_t rights; addr = (vm_offset_t) uap->addr; size = uap->len; prot = uap->prot; flags = uap->flags; pos = uap->pos; fp = NULL; /* + * Ignore old flags that used to be defined but did not do anything. + */ + if (td->td_proc->p_osrel < P_OSREL_MAP_RENAME) + flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040); + + /* * Enforce the constraints. * Mapping of length 0 is only allowed for old binaries. * Anonymous mapping shall specify -1 as filedescriptor and * zero position for new code. Be nice to ancient a.out * binaries and correct pos for anonymous mapping, since old * ld.so sometimes issues anonymous map requests with non-zero * pos. */ if (!SV_CURPROC_FLAG(SV_AOUT)) { if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) || ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0))) return (EINVAL); } else { if ((flags & MAP_ANON) != 0) pos = 0; } if (flags & MAP_STACK) { if ((uap->fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) return (EINVAL); flags |= MAP_ANON; pos = 0; } - if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_RENAME | - MAP_NORESERVE | MAP_HASSEMAPHORE | MAP_STACK | MAP_NOSYNC | - MAP_ANON | MAP_EXCL | MAP_NOCORE | MAP_PREFAULT_READ | + if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE | + MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE | + MAP_PREFAULT_READ | #ifdef MAP_32BIT MAP_32BIT | #endif MAP_ALIGNMENT_MASK)) != 0) return (EINVAL); if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL) return (EINVAL); if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE)) return (EINVAL); if (prot != PROT_NONE && (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (pos & PAGE_MASK); pos -= pageoff; /* Adjust size for rounding (on both ends). */ size += pageoff; /* low end... */ size = (vm_size_t) round_page(size); /* hi end */ /* Ensure alignment is at least a page and fits in a pointer. */ align = flags & MAP_ALIGNMENT_MASK; if (align != 0 && align != MAP_ALIGNED_SUPER && (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) return (EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ addr -= pageoff; if (addr & PAGE_MASK) return (EINVAL); /* Address range must be all in user VM space. */ if (addr < vm_map_min(&vms->vm_map) || addr + size > vm_map_max(&vms->vm_map)) return (EINVAL); if (addr + size < addr) return (EINVAL); #ifdef MAP_32BIT if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR) return (EINVAL); } else if (flags & MAP_32BIT) { /* * For MAP_32BIT, override the hint if it is too high and * do not bother moving the mapping past the heap (since * the heap is usually above 2GB). */ if (addr + size > MAP_32BIT_MAX_ADDR) addr = 0; #endif } else { /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ PROC_LOCK(td->td_proc); if (addr == 0 || (addr >= round_page((vm_offset_t)vms->vm_taddr) && addr < round_page((vm_offset_t)vms->vm_daddr + lim_max(td->td_proc, RLIMIT_DATA)))) addr = round_page((vm_offset_t)vms->vm_daddr + lim_max(td->td_proc, RLIMIT_DATA)); PROC_UNLOCK(td->td_proc); } if (flags & MAP_ANON) { /* * Mapping blank space is trivial. */ handle = NULL; handle_type = OBJT_DEFAULT; maxprot = VM_PROT_ALL; cap_maxprot = VM_PROT_ALL; } else { /* * Mapping file, get fp for validation and don't let the * descriptor disappear on us if we block. Check capability * rights, but also return the maximum rights to be combined * with maxprot later. */ cap_rights_init(&rights, CAP_MMAP); if (prot & PROT_READ) cap_rights_set(&rights, CAP_MMAP_R); if ((flags & MAP_SHARED) != 0) { if (prot & PROT_WRITE) cap_rights_set(&rights, CAP_MMAP_W); } if (prot & PROT_EXEC) cap_rights_set(&rights, CAP_MMAP_X); error = fget_mmap(td, uap->fd, &rights, &cap_maxprot, &fp); if (error != 0) goto done; if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { error = EINVAL; goto done; } if (fp->f_type == DTYPE_SHM) { handle = fp->f_data; handle_type = OBJT_SWAP; maxprot = VM_PROT_NONE; /* FREAD should always be set. */ if (fp->f_flag & FREAD) maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; if (fp->f_flag & FWRITE) maxprot |= VM_PROT_WRITE; goto map; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto done; } #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support * read(2)/write(2) -- or even open(2). Thus, we can * use MAP_ASYNC to trade on-disk coherence for speed. * The shm_open(3) library routine turns on the FPOSIXSHM * flag to request this behavior. */ if (fp->f_flag & FPOSIXSHM) flags |= MAP_NOSYNC; #endif vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) maxprot = VM_PROT_NONE; else maxprot = VM_PROT_EXECUTE; if (fp->f_flag & FREAD) { maxprot |= VM_PROT_READ; } else if (prot & PROT_READ) { error = EACCES; goto done; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) { maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { error = EACCES; goto done; } } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { maxprot |= VM_PROT_WRITE; cap_maxprot |= VM_PROT_WRITE; } handle = (void *)vp; handle_type = OBJT_VNODE; } map: td->td_fpop = fp; maxprot &= cap_maxprot; /* This relies on VM_PROT_* matching PROT_*. */ error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, flags, handle_type, handle, pos); td->td_fpop = NULL; #ifdef HWPMC_HOOKS /* inform hwpmc(4) if an executable is being mapped */ if (error == 0 && handle_type == OBJT_VNODE && (prot & PROT_EXEC)) { pkm.pm_file = handle; pkm.pm_address = (uintptr_t) addr; PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); } #endif if (error == 0) td->td_retval[0] = (register_t) (addr + pageoff); done: if (fp) fdrop(fp, td); return (error); } int freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap) { struct mmap_args oargs; oargs.addr = uap->addr; oargs.len = uap->len; oargs.prot = uap->prot; oargs.flags = uap->flags; oargs.fd = uap->fd; oargs.pos = uap->pos; return (sys_mmap(td, &oargs)); } #ifdef COMPAT_43 #ifndef _SYS_SYSPROTO_H_ struct ommap_args { caddr_t addr; int len; int prot; int flags; int fd; long pos; }; #endif int ommap(td, uap) struct thread *td; struct ommap_args *uap; { struct mmap_args nargs; static const char cvtbsdprot[8] = { 0, PROT_EXEC, PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_READ, PROT_EXEC | PROT_READ, PROT_WRITE | PROT_READ, PROT_EXEC | PROT_WRITE | PROT_READ, }; #define OMAP_ANON 0x0002 #define OMAP_COPY 0x0020 #define OMAP_SHARED 0x0010 #define OMAP_FIXED 0x0100 nargs.addr = uap->addr; nargs.len = uap->len; nargs.prot = cvtbsdprot[uap->prot & 0x7]; #ifdef COMPAT_FREEBSD32 #if defined(__amd64__) if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) && nargs.prot != 0) nargs.prot |= PROT_EXEC; #endif #endif nargs.flags = 0; if (uap->flags & OMAP_ANON) nargs.flags |= MAP_ANON; if (uap->flags & OMAP_COPY) nargs.flags |= MAP_COPY; if (uap->flags & OMAP_SHARED) nargs.flags |= MAP_SHARED; else nargs.flags |= MAP_PRIVATE; if (uap->flags & OMAP_FIXED) nargs.flags |= MAP_FIXED; nargs.fd = uap->fd; nargs.pos = uap->pos; return (sys_mmap(td, &nargs)); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct msync_args { void *addr; size_t len; int flags; }; #endif /* * MPSAFE */ int sys_msync(td, uap) struct thread *td; struct msync_args *uap; { vm_offset_t addr; vm_size_t size, pageoff; int flags; vm_map_t map; int rv; addr = (vm_offset_t) uap->addr; size = uap->len; flags = uap->flags; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) return (EINVAL); map = &td->td_proc->p_vmspace->vm_map; /* * Clean the pages and interpret the return value. */ rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: return (ENOMEM); case KERN_INVALID_ARGUMENT: return (EBUSY); case KERN_FAILURE: return (EIO); default: return (EINVAL); } } #ifndef _SYS_SYSPROTO_H_ struct munmap_args { void *addr; size_t len; }; #endif /* * MPSAFE */ int sys_munmap(td, uap) struct thread *td; struct munmap_args *uap; { #ifdef HWPMC_HOOKS struct pmckern_map_out pkm; vm_map_entry_t entry; #endif vm_offset_t addr; vm_size_t size, pageoff; vm_map_t map; addr = (vm_offset_t) uap->addr; size = uap->len; if (size == 0) return (EINVAL); pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); /* * Check for illegal addresses. Watch out for address wrap... */ map = &td->td_proc->p_vmspace->vm_map; if (addr < vm_map_min(map) || addr + size > vm_map_max(map)) return (EINVAL); vm_map_lock(map); #ifdef HWPMC_HOOKS /* * Inform hwpmc if the address range being unmapped contains * an executable region. */ pkm.pm_address = (uintptr_t) NULL; if (vm_map_lookup_entry(map, addr, &entry)) { for (; entry != &map->header && entry->start < addr + size; entry = entry->next) { if (vm_map_check_protection(map, entry->start, entry->end, VM_PROT_EXECUTE) == TRUE) { pkm.pm_address = (uintptr_t) addr; pkm.pm_size = (size_t) size; break; } } } #endif vm_map_delete(map, addr, addr + size); #ifdef HWPMC_HOOKS /* downgrade the lock to prevent a LOR with the pmc-sx lock */ vm_map_lock_downgrade(map); if (pkm.pm_address != (uintptr_t) NULL) PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm); vm_map_unlock_read(map); #else vm_map_unlock(map); #endif /* vm_map_delete returns nothing but KERN_SUCCESS anyway */ return (0); } #ifndef _SYS_SYSPROTO_H_ struct mprotect_args { const void *addr; size_t len; int prot; }; #endif /* * MPSAFE */ int sys_mprotect(td, uap) struct thread *td; struct mprotect_args *uap; { vm_offset_t addr; vm_size_t size, pageoff; vm_prot_t prot; addr = (vm_offset_t) uap->addr; size = uap->len; prot = uap->prot & VM_PROT_ALL; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, addr + size, prot, FALSE)) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); case KERN_RESOURCE_SHORTAGE: return (ENOMEM); } return (EINVAL); } #ifndef _SYS_SYSPROTO_H_ struct minherit_args { void *addr; size_t len; int inherit; }; #endif /* * MPSAFE */ int sys_minherit(td, uap) struct thread *td; struct minherit_args *uap; { vm_offset_t addr; vm_size_t size, pageoff; vm_inherit_t inherit; addr = (vm_offset_t)uap->addr; size = uap->len; inherit = uap->inherit; pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; size = (vm_size_t) round_page(size); if (addr + size < addr) return (EINVAL); switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr, addr + size, inherit)) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); } return (EINVAL); } #ifndef _SYS_SYSPROTO_H_ struct madvise_args { void *addr; size_t len; int behav; }; #endif /* * MPSAFE */ int sys_madvise(td, uap) struct thread *td; struct madvise_args *uap; { vm_offset_t start, end; vm_map_t map; int flags; /* * Check for our special case, advising the swap pager we are * "immortal." */ if (uap->behav == MADV_PROTECT) { flags = PPROT_SET; return (kern_procctl(td, P_PID, td->td_proc->p_pid, PROC_SPROTECT, &flags)); } /* * Check for illegal behavior */ if (uap->behav < 0 || uap->behav > MADV_CORE) return (EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ map = &td->td_proc->p_vmspace->vm_map; if ((vm_offset_t)uap->addr < vm_map_min(map) || (vm_offset_t)uap->addr + uap->len > vm_map_max(map)) return (EINVAL); if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) return (EINVAL); /* * Since this routine is only advisory, we default to conservative * behavior. */ start = trunc_page((vm_offset_t) uap->addr); end = round_page((vm_offset_t) uap->addr + uap->len); if (vm_map_madvise(map, start, end, uap->behav)) return (EINVAL); return (0); } #ifndef _SYS_SYSPROTO_H_ struct mincore_args { const void *addr; size_t len; char *vec; }; #endif /* * MPSAFE */ int sys_mincore(td, uap) struct thread *td; struct mincore_args *uap; { vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error = 0; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; vm_object_t object; vm_paddr_t locked_pa; vm_page_t m; vm_pindex_t pindex; int mincoreinfo; unsigned int timestamp; boolean_t locked; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); map = &td->td_proc->p_vmspace->vm_map; if (end > vm_map_max(map) || end < addr) return (ENOMEM); /* * Address of byte vector */ vec = uap->vec; pmap = vmspace_pmap(td->td_proc->p_vmspace); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) { vm_map_unlock_read(map); return (ENOMEM); } /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for (current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * check for contiguity */ if (current->end < end && (entry->next == &map->header || current->next->start > current->end)) { vm_map_unlock_read(map); return (ENOMEM); } /* * ignore submaps (for now) or null objects */ if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) || current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. */ object = NULL; locked_pa = 0; retry: m = NULL; mincoreinfo = pmap_mincore(pmap, addr, &locked_pa); if (locked_pa != 0) { /* * The page is mapped by this process but not * both accessed and modified. It is also * managed. Acquire the object lock so that * other mappings might be examined. */ m = PHYS_TO_VM_PAGE(locked_pa); if (m->object != object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = m->object; locked = VM_OBJECT_TRYWLOCK(object); vm_page_unlock(m); if (!locked) { VM_OBJECT_WLOCK(object); vm_page_lock(m); goto retry; } } else vm_page_unlock(m); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("mincore: page %p is mapped but invalid", m)); } else if (mincoreinfo == 0) { /* * The page is not mapped by this process. If * the object implements managed pages, then * determine if the page is resident so that * the mappings might be examined. */ if (current->object.vm_object != object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); object = current->object.vm_object; VM_OBJECT_WLOCK(object); } if (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP || object->type == OBJT_VNODE) { pindex = OFF_TO_IDX(current->offset + (addr - current->start)); m = vm_page_lookup(object, pindex); if (m == NULL && vm_page_is_cached(object, pindex)) mincoreinfo = MINCORE_INCORE; if (m != NULL && m->valid == 0) m = NULL; if (m != NULL) mincoreinfo = MINCORE_INCORE; } } if (m != NULL) { /* Examine other mappings to the page. */ if (m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); if (m->dirty != 0) mincoreinfo |= MINCORE_MODIFIED_OTHER; /* * The first test for PGA_REFERENCED is an * optimization. The second test is * required because a concurrent pmap * operation could clear the last reference * and set PGA_REFERENCED before the call to * pmap_is_referenced(). */ if ((m->aflags & PGA_REFERENCED) != 0 || pmap_is_referenced(m) || (m->aflags & PGA_REFERENCED) != 0) mincoreinfo |= MINCORE_REFERENCED_OTHER; } if (object != NULL) VM_OBJECT_WUNLOCK(object); /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while ((lastvecindex + 1) < vecindex) { ++lastvecindex; error = subyte(vec + lastvecindex, 0); if (error) { error = EFAULT; goto done2; } } /* * Pass the page information to the user */ error = subyte(vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done2; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while ((lastvecindex + 1) < vecindex) { ++lastvecindex; error = subyte(vec + lastvecindex, 0); if (error) { error = EFAULT; goto done2; } } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); done2: return (error); } #ifndef _SYS_SYSPROTO_H_ struct mlock_args { const void *addr; size_t len; }; #endif /* * MPSAFE */ int sys_mlock(td, uap) struct thread *td; struct mlock_args *uap; { return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len)); } int vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len) { vm_offset_t addr, end, last, start; vm_size_t npages, size; vm_map_t map; unsigned long nsize; int error; error = priv_check_cred(cred, PRIV_VM_MLOCK, 0); if (error) return (error); addr = (vm_offset_t)addr0; size = len; last = addr + size; start = trunc_page(addr); end = round_page(last); if (last < addr || end < addr) return (EINVAL); npages = atop(end - start); if (npages > vm_page_max_wired) return (ENOMEM); map = &proc->p_vmspace->vm_map; PROC_LOCK(proc); nsize = ptoa(npages + pmap_wired_count(map->pmap)); if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) { PROC_UNLOCK(proc); return (ENOMEM); } PROC_UNLOCK(proc); if (npages + vm_cnt.v_wire_count > vm_page_max_wired) return (EAGAIN); #ifdef RACCT PROC_LOCK(proc); error = racct_set(proc, RACCT_MEMLOCK, nsize); PROC_UNLOCK(proc); if (error != 0) return (ENOMEM); #endif error = vm_map_wire(map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); #ifdef RACCT if (error != KERN_SUCCESS) { PROC_LOCK(proc); racct_set(proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(proc); } #endif return (error == KERN_SUCCESS ? 0 : ENOMEM); } #ifndef _SYS_SYSPROTO_H_ struct mlockall_args { int how; }; #endif /* * MPSAFE */ int sys_mlockall(td, uap) struct thread *td; struct mlockall_args *uap; { vm_map_t map; int error; map = &td->td_proc->p_vmspace->vm_map; error = priv_check(td, PRIV_VM_MLOCK); if (error) return (error); if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) return (EINVAL); /* * If wiring all pages in the process would cause it to exceed * a hard resource limit, return ENOMEM. */ if (!old_mlock && uap->how & MCL_CURRENT) { PROC_LOCK(td->td_proc); if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { PROC_UNLOCK(td->td_proc); return (ENOMEM); } PROC_UNLOCK(td->td_proc); } #ifdef RACCT PROC_LOCK(td->td_proc); error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); PROC_UNLOCK(td->td_proc); if (error != 0) return (ENOMEM); #endif if (uap->how & MCL_FUTURE) { vm_map_lock(map); vm_map_modflags(map, MAP_WIREFUTURE, 0); vm_map_unlock(map); error = 0; } if (uap->how & MCL_CURRENT) { /* * P1003.1-2001 mandates that all currently mapped pages * will be memory resident and locked (wired) upon return * from mlockall(). vm_map_wire() will wire pages, by * calling vm_fault_wire() for each page in the region. */ error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); error = (error == KERN_SUCCESS ? 0 : EAGAIN); } #ifdef RACCT if (error != KERN_SUCCESS) { PROC_LOCK(td->td_proc); racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(td->td_proc); } #endif return (error); } #ifndef _SYS_SYSPROTO_H_ struct munlockall_args { register_t dummy; }; #endif /* * MPSAFE */ int sys_munlockall(td, uap) struct thread *td; struct munlockall_args *uap; { vm_map_t map; int error; map = &td->td_proc->p_vmspace->vm_map; error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); /* Clear the MAP_WIREFUTURE flag from this vm_map. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE); vm_map_unlock(map); /* Forcibly unwire all pages. */ error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); #ifdef RACCT if (error == KERN_SUCCESS) { PROC_LOCK(td->td_proc); racct_set(td->td_proc, RACCT_MEMLOCK, 0); PROC_UNLOCK(td->td_proc); } #endif return (error); } #ifndef _SYS_SYSPROTO_H_ struct munlock_args { const void *addr; size_t len; }; #endif /* * MPSAFE */ int sys_munlock(td, uap) struct thread *td; struct munlock_args *uap; { vm_offset_t addr, end, last, start; vm_size_t size; #ifdef RACCT vm_map_t map; #endif int error; error = priv_check(td, PRIV_VM_MUNLOCK); if (error) return (error); addr = (vm_offset_t)uap->addr; size = uap->len; last = addr + size; start = trunc_page(addr); end = round_page(last); if (last < addr || end < addr) return (EINVAL); error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); #ifdef RACCT if (error == KERN_SUCCESS) { PROC_LOCK(td->td_proc); map = &td->td_proc->p_vmspace->vm_map; racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap))); PROC_UNLOCK(td->td_proc); } #endif return (error == KERN_SUCCESS ? 0 : ENOMEM); } /* * vm_mmap_vnode() * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on vnodes. * * For VCHR vnodes, the vnode lock is held over the call to * vm_mmap_cdev() to keep vp->v_rdev valid. */ int vm_mmap_vnode(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, boolean_t *writecounted) { struct vattr va; vm_object_t obj; vm_offset_t foff; struct mount *mp; struct ucred *cred; int error, flags, locktype; mp = vp->v_mount; cred = td->td_ucred; if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED)) locktype = LK_EXCLUSIVE; else locktype = LK_SHARED; if ((error = vget(vp, locktype, td)) != 0) return (error); foff = *foffp; flags = *flagsp; obj = vp->v_object; if (vp->v_type == VREG) { /* * Get the proper underlying object */ if (obj == NULL) { error = EINVAL; goto done; } if (obj->type == OBJT_VNODE && obj->handle != vp) { vput(vp); vp = (struct vnode *)obj->handle; /* * Bypass filesystems obey the mpsafety of the * underlying fs. Tmpfs never bypasses. */ error = vget(vp, locktype, td); if (error != 0) return (error); } if (locktype == LK_EXCLUSIVE) { *writecounted = TRUE; vnode_pager_update_writecount(obj, 0, objsize); } } else if (vp->v_type == VCHR) { error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, vp->v_rdev, foffp, objp); if (error == 0) goto mark_atime; goto done; } else { error = EINVAL; goto done; } if ((error = VOP_GETATTR(vp, &va, cred))) goto done; #ifdef MAC error = mac_vnode_check_mmap(cred, vp, prot, flags); if (error != 0) goto done; #endif if ((flags & MAP_SHARED) != 0) { if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { if (prot & PROT_WRITE) { error = EPERM; goto done; } *maxprotp &= ~VM_PROT_WRITE; } } /* * If it is a regular file without any references * we do not need to sync it. * Adjust object size to be the size of actual file. */ objsize = round_page(va.va_size); if (va.va_nlink == 0) flags |= MAP_NOSYNC; if (obj->type == OBJT_VNODE) obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred); else { KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP, ("wrong object type")); vm_object_reference(obj); } if (obj == NULL) { error = ENOMEM; goto done; } *objp = obj; *flagsp = flags; mark_atime: vfs_mark_atime(vp, cred); done: if (error != 0 && *writecounted) { *writecounted = FALSE; vnode_pager_update_writecount(obj, objsize, 0); } vput(vp); return (error); } /* * vm_mmap_cdev() * * MPSAFE * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on cdevs. */ int vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) { vm_object_t obj; struct cdevsw *dsw; int error, flags, ref; flags = *flagsp; dsw = dev_refthread(cdev, &ref); if (dsw == NULL) return (ENXIO); if (dsw->d_flags & D_MMAP_ANON) { dev_relthread(cdev, ref); *maxprotp = VM_PROT_ALL; *flagsp |= MAP_ANON; return (0); } /* * cdevs do not provide private mappings of any kind. */ if ((*maxprotp & VM_PROT_WRITE) == 0 && (prot & PROT_WRITE) != 0) { dev_relthread(cdev, ref); return (EACCES); } if (flags & (MAP_PRIVATE|MAP_COPY)) { dev_relthread(cdev, ref); return (EINVAL); } /* * Force device mappings to be shared. */ flags |= MAP_SHARED; #ifdef MAC_XXX error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); if (error != 0) { dev_relthread(cdev, ref); return (error); } #endif /* * First, try d_mmap_single(). If that is not implemented * (returns ENODEV), fall back to using the device pager. * Note that d_mmap_single() must return a reference to the * object (it needs to bump the reference count of the object * it returns somehow). * * XXX assumes VM_PROT_* == PROT_* */ error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); dev_relthread(cdev, ref); if (error != ENODEV) return (error); obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, td->td_ucred); if (obj == NULL) return (EINVAL); *objp = obj; *flagsp = flags; return (0); } /* * vm_mmap_shm() * * MPSAFE * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on shm file descriptors. */ int vm_mmap_shm(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) { int error; if ((*flagsp & MAP_SHARED) != 0 && (*maxprotp & VM_PROT_WRITE) == 0 && (prot & PROT_WRITE) != 0) return (EACCES); #ifdef MAC error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); if (error != 0) return (error); #endif error = shm_mmap(shmfd, objsize, foff, objp); if (error) return (error); return (0); } /* * vm_mmap() * * MPSAFE * * Internal version of mmap. Currently used by mmap, exec, and sys5 * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. */ int vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t maxprot, int flags, objtype_t handle_type, void *handle, vm_ooffset_t foff) { boolean_t fitit; vm_object_t object = NULL; struct thread *td = curthread; int docow, error, findspace, rv; boolean_t writecounted; if (size == 0) return (0); size = round_page(size); if (map == &td->td_proc->p_vmspace->vm_map) { PROC_LOCK(td->td_proc); if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { PROC_UNLOCK(td->td_proc); return (ENOMEM); } if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) { PROC_UNLOCK(td->td_proc); return (ENOMEM); } if (!old_mlock && map->flags & MAP_WIREFUTURE) { if (ptoa(pmap_wired_count(map->pmap)) + size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { racct_set_force(td->td_proc, RACCT_VMEM, map->size); PROC_UNLOCK(td->td_proc); return (ENOMEM); } error = racct_set(td->td_proc, RACCT_MEMLOCK, ptoa(pmap_wired_count(map->pmap)) + size); if (error != 0) { racct_set_force(td->td_proc, RACCT_VMEM, map->size); PROC_UNLOCK(td->td_proc); return (error); } } PROC_UNLOCK(td->td_proc); } /* * We currently can only deal with page aligned file offsets. * The check is here rather than in the syscall because the * kernel calls this function internally for other mmaping * operations (such as in exec) and non-aligned offsets will * cause pmap inconsistencies...so we want to be sure to * disallow this in all cases. */ if (foff & PAGE_MASK) return (EINVAL); if ((flags & MAP_FIXED) == 0) { fitit = TRUE; *addr = round_page(*addr); } else { if (*addr != trunc_page(*addr)) return (EINVAL); fitit = FALSE; } writecounted = FALSE; /* * Lookup/allocate object. */ switch (handle_type) { case OBJT_DEVICE: error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, handle, &foff, &object); break; case OBJT_VNODE: error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, handle, &foff, &object, &writecounted); break; case OBJT_SWAP: error = vm_mmap_shm(td, size, prot, &maxprot, &flags, handle, foff, &object); break; case OBJT_DEFAULT: if (handle == NULL) { error = 0; break; } /* FALLTHROUGH */ default: error = EINVAL; break; } if (error) return (error); if (flags & MAP_ANON) { object = NULL; docow = 0; /* * Unnamed anonymous regions always start at 0. */ if (handle == 0) foff = 0; } else if (flags & MAP_PREFAULT_READ) docow = MAP_PREFAULT; else docow = MAP_PREFAULT_PARTIAL; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) docow |= MAP_COPY_ON_WRITE; if (flags & MAP_NOSYNC) docow |= MAP_DISABLE_SYNCER; if (flags & MAP_NOCORE) docow |= MAP_DISABLE_COREDUMP; /* Shared memory is also shared with children. */ if (flags & MAP_SHARED) docow |= MAP_INHERIT_SHARE; if (writecounted) docow |= MAP_VN_WRITECOUNT; if (flags & MAP_STACK) { if (object != NULL) return (EINVAL); docow |= MAP_STACK_GROWS_DOWN; } if ((flags & MAP_EXCL) != 0) docow |= MAP_CHECK_EXCL; if (fitit) { if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) findspace = VMFS_SUPER_SPACE; else if ((flags & MAP_ALIGNMENT_MASK) != 0) findspace = VMFS_ALIGNED_SPACE(flags >> MAP_ALIGNMENT_SHIFT); else findspace = VMFS_OPTIMAL_SPACE; rv = vm_map_find(map, object, foff, addr, size, #ifdef MAP_32BIT flags & MAP_32BIT ? MAP_32BIT_MAX_ADDR : #endif 0, findspace, prot, maxprot, docow); } else { rv = vm_map_fixed(map, object, foff, *addr, size, prot, maxprot, docow); } if (rv == KERN_SUCCESS) { /* * If the process has requested that all future mappings * be wired, then heed this. */ if (map->flags & MAP_WIREFUTURE) { vm_map_wire(map, *addr, *addr + size, VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); } } else { /* * If this mapping was accounted for in the vnode's * writecount, then undo that now. */ if (writecounted) vnode_pager_release_writecount(object, 0, size); /* * Lose the object reference. Will destroy the * object if it's an unnamed anonymous mapping * or named anonymous without other references. */ vm_object_deallocate(object); } return (vm_mmap_to_errno(rv)); } /* * Translate a Mach VM return code to zero on success or the appropriate errno * on failure. */ int vm_mmap_to_errno(int rv) { switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } } Index: head/usr.bin/truss/syscalls.c =================================================================== --- head/usr.bin/truss/syscalls.c (revision 273249) +++ head/usr.bin/truss/syscalls.c (revision 273250) @@ -1,1416 +1,1416 @@ /* * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ /* * This file has routines used to print out system calls and their * arguments. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "extern.h" #include "syscall.h" /* 64-bit alignment on 32-bit platforms. */ #ifdef __powerpc__ #define QUAD_ALIGN 1 #else #define QUAD_ALIGN 0 #endif /* Number of slots needed for a 64-bit argument. */ #ifdef __LP64__ #define QUAD_SLOTS 1 #else #define QUAD_SLOTS 2 #endif /* * This should probably be in its own file, sorted alphabetically. */ static struct syscall syscalls[] = { { .name = "fcntl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 } , { Fcntl, 1 }, { Fcntlflag | OUT, 2 } } }, { .name = "fork", .ret_type = 1, .nargs = 0 }, { .name = "vfork", .ret_type = 1, .nargs = 0 }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "getegid", .ret_type = 1, .nargs = 0 }, { .name = "geteuid", .ret_type = 1, .nargs = 0 }, { .name = "linux_readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 } , { Name | OUT, 1 }, { Int, 2 }}}, { .name = "linux_socketcall", .ret_type = 1, .nargs = 2, .args = { { Int, 0 } , { LinuxSockArgs, 1 }}}, { .name = "getgid", .ret_type = 1, .nargs = 0 }, { .name = "getpid", .ret_type = 1, .nargs = 0 }, { .name = "getpgid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getpgrp", .ret_type = 1, .nargs = 0 }, { .name = "getppid", .ret_type = 1, .nargs = 0 }, { .name = "getsid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getuid", .ret_type = 1, .nargs = 0 }, { .name = "readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 } , { Readlinkres | OUT, 1 }, { Int, 2 } } }, { .name = "lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Quad, 1 + QUAD_ALIGN }, { Whence, 1 + QUAD_SLOTS + QUAD_ALIGN } } }, { .name = "linux_lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } }, { .name = "mmap", .ret_type = 2, .nargs = 6, .args = { { Ptr, 0 }, { Int, 1 }, { Mprot, 2 }, { Mmapflags, 3 }, { Int, 4 }, { Quad, 5 + QUAD_ALIGN } } }, { .name = "linux_mkdir", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0} , {Int, 1}}}, { .name = "mprotect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { Mprot, 2 } } }, { .name = "open", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 } , { Open, 1 }, { Octal, 2 } } }, { .name = "mkdir", .ret_type = 1, .nargs = 2, .args = { { Name, 0 } , { Octal, 1 } } }, { .name = "linux_open", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } }, { .name = "close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "link", .ret_type = 0, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "unlink", .ret_type = 0, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chdir", .ret_type = 0, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chroot", .ret_type = 0, .nargs = 1, .args = { { Name, 0 } } }, { .name = "mknod", .ret_type = 0, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Int, 3 } } }, { .name = "chmod", .ret_type = 0, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "chown", .ret_type = 0, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "linux_stat64", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 }, { Ptr | IN, 1 }}}, { .name = "mount", .ret_type = 0, .nargs = 4, .args = { { Name, 0 }, { Name, 1 }, { Int, 2 }, { Ptr, 3 } } }, { .name = "umount", .ret_type = 0, .nargs = 2, .args = { { Name, 0 }, { Int, 2 } } }, { .name = "fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat | OUT , 1 } } }, { .name = "stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "linux_newstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_access", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 1 }}}, { .name = "linux_newfstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr | OUT, 1 } } }, { .name = "write", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Hex, 2 } } }, { .name = "break", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "exit", .ret_type = 0, .nargs = 1, .args = { { Hex, 0 } } }, { .name = "access", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Int, 1 } } }, { .name = "sigaction", .ret_type = 1, .nargs = 3, .args = { { Signal, 0 }, { Sigaction | IN, 1 }, { Sigaction | OUT, 2 } } }, { .name = "accept", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Int, 2 } } }, { .name = "connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Int, 2 } } }, { .name = "getpeername", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getsockname", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "recvfrom", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { Hex, 3 }, { Sockaddr | OUT, 4 }, { Ptr | OUT, 5 } } }, { .name = "sendto", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { Hex, 3 }, { Sockaddr | IN, 4 }, { Ptr | IN, 5 } } }, { .name = "execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { StringArray | IN, 1 }, { StringArray | IN, 2 } } }, { .name = "linux_execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { StringArray | IN, 1 }, { StringArray | IN, 2 } } }, { .name = "kldload", .ret_type = 0, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldunload", .ret_type = 0, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldfind", .ret_type = 0, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldnext", .ret_type = 0, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldstat", .ret_type = 0, .nargs = 2, .args = { { Int, 0 }, { Ptr, 1 } } }, { .name = "kldfirstmod", .ret_type = 0, .nargs = 1, .args = { { Int, 0 } } }, { .name = "nanosleep", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, { .name = "poll", .ret_type = 1, .nargs = 3, .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, { .name = "clock_gettime", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "getitimer", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Itimerval | OUT, 2 } } }, { .name = "setitimer", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Itimerval, 1 } , { Itimerval | OUT, 2 } } }, { .name = "kse_release", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "kevent", .ret_type = 0, .nargs = 6, .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "sigprocmask", .ret_type = 0, .nargs = 3, .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } }, { .name = "unmount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 1 } } }, { .name = "socket", .ret_type = 1, .nargs = 3, .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Int, 2 } } }, { .name = "getrusage", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Rusage | OUT, 1 } } }, { .name = "__getcwd", .ret_type = 1, .nargs = 2, .args = { { Name | OUT, 0 }, { Int, 1 } } }, { .name = "shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Shutdown, 1 } } }, { .name = "getrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } }, { .name = "setrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | IN, 1 } } }, { .name = "utimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "lutimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "futimes", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timeval | IN, 1 } } }, { .name = "chflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Hex, 1 } } }, { .name = "lchflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Hex, 1 } } }, { .name = "pathconf", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Pathconf, 1 } } }, { .name = "pipe", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "truncate", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Int | IN, 1 }, { Quad | IN, 2 } } }, { .name = "ftruncate", .ret_type = 1, .nargs = 3, .args = { { Int | IN, 0 }, { Int | IN, 1 }, { Quad | IN, 2 } } }, { .name = "kill", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { Signal | IN, 1 } } }, { .name = "munmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "read", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 } } }, { .name = "rename", .ret_type = 1, .nargs = 2, .args = { { Name , 0 } , { Name, 1 } } }, { .name = "symlink", .ret_type = 1, .nargs = 2, .args = { { Name , 0 } , { Name, 1 } } }, { .name = "posix_openpt", .ret_type = 1, .nargs = 1, .args = { { Open, 0 } } }, { .name = "wait4", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 }, { Rusage | OUT, 3 } } }, { .name = "wait6", .ret_type = 1, .nargs = 6, .args = { { Idtype, 0 }, { Int, 1 }, { ExitStatus | OUT, 2 }, { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Ptr, 5 } } }, { .name = "procctl", .ret_type = 1, .nargs = 4, .args = { { Idtype, 0 }, { Int, 1 }, { Procctl, 2 }, { Ptr, 3 } } }, { .name = "_umtx_op", .ret_type = 1, .nargs = 5, .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 }, { Ptr, 4 } } }, { .name = 0 }, }; /* Xlat idea taken from strace */ struct xlat { int val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } static struct xlat kevent_filters[] = { X(EVFILT_READ) X(EVFILT_WRITE) X(EVFILT_AIO) X(EVFILT_VNODE) X(EVFILT_PROC) X(EVFILT_SIGNAL) X(EVFILT_TIMER) X(EVFILT_FS) X(EVFILT_READ) XEND }; static struct xlat kevent_flags[] = { X(EV_ADD) X(EV_DELETE) X(EV_ENABLE) X(EV_DISABLE) X(EV_ONESHOT) X(EV_CLEAR) X(EV_FLAG1) X(EV_ERROR) X(EV_EOF) XEND }; static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) X(POLLWRBAND) X(POLLINIGNEOF) XEND }; static struct xlat mmap_flags[] = { - X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME) - X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100) + X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RESERVED0020) + X(MAP_RESERVED0040) X(MAP_RESERVED0080) X(MAP_RESERVED0100) X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON) X(MAP_NOCORE) X(MAP_PREFAULT_READ) #ifdef MAP_32BIT X(MAP_32BIT) #endif XEND }; static struct xlat mprot_flags[] = { X(PROT_NONE) X(PROT_READ) X(PROT_WRITE) X(PROT_EXEC) XEND }; static struct xlat whence_arg[] = { X(SEEK_SET) X(SEEK_CUR) X(SEEK_END) XEND }; static struct xlat sigaction_flags[] = { X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP) X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND }; static struct xlat fcntl_arg[] = { X(F_DUPFD) X(F_GETFD) X(F_SETFD) X(F_GETFL) X(F_SETFL) X(F_GETOWN) X(F_SETOWN) X(F_GETLK) X(F_SETLK) X(F_SETLKW) XEND }; static struct xlat fcntlfd_arg[] = { X(FD_CLOEXEC) XEND }; static struct xlat fcntlfl_arg[] = { X(O_APPEND) X(O_ASYNC) X(O_FSYNC) X(O_NONBLOCK) X(O_NOFOLLOW) X(O_DIRECT) XEND }; static struct xlat sockdomain_arg[] = { X(PF_UNSPEC) X(PF_LOCAL) X(PF_UNIX) X(PF_INET) X(PF_IMPLINK) X(PF_PUP) X(PF_CHAOS) X(PF_NETBIOS) X(PF_ISO) X(PF_OSI) X(PF_ECMA) X(PF_DATAKIT) X(PF_CCITT) X(PF_SNA) X(PF_DECnet) X(PF_DLI) X(PF_LAT) X(PF_HYLINK) X(PF_APPLETALK) X(PF_ROUTE) X(PF_LINK) X(PF_XTP) X(PF_COIP) X(PF_CNT) X(PF_SIP) X(PF_IPX) X(PF_RTIP) X(PF_PIP) X(PF_ISDN) X(PF_KEY) X(PF_INET6) X(PF_NATM) X(PF_ATM) X(PF_NETGRAPH) X(PF_SLOW) X(PF_SCLUSTER) X(PF_ARP) X(PF_BLUETOOTH) XEND }; static struct xlat socktype_arg[] = { X(SOCK_STREAM) X(SOCK_DGRAM) X(SOCK_RAW) X(SOCK_RDM) X(SOCK_SEQPACKET) XEND }; static struct xlat open_flags[] = { X(O_RDONLY) X(O_WRONLY) X(O_RDWR) X(O_ACCMODE) X(O_NONBLOCK) X(O_APPEND) X(O_SHLOCK) X(O_EXLOCK) X(O_ASYNC) X(O_FSYNC) X(O_NOFOLLOW) X(O_CREAT) X(O_TRUNC) X(O_EXCL) X(O_NOCTTY) X(O_DIRECT) X(O_DIRECTORY) X(O_EXEC) X(O_TTY_INIT) X(O_CLOEXEC) XEND }; static struct xlat shutdown_arg[] = { X(SHUT_RD) X(SHUT_WR) X(SHUT_RDWR) XEND }; static struct xlat resource_arg[] = { X(RLIMIT_CPU) X(RLIMIT_FSIZE) X(RLIMIT_DATA) X(RLIMIT_STACK) X(RLIMIT_CORE) X(RLIMIT_RSS) X(RLIMIT_MEMLOCK) X(RLIMIT_NPROC) X(RLIMIT_NOFILE) X(RLIMIT_SBSIZE) X(RLIMIT_VMEM) XEND }; static struct xlat pathconf_arg[] = { X(_PC_LINK_MAX) X(_PC_MAX_CANON) X(_PC_MAX_INPUT) X(_PC_NAME_MAX) X(_PC_PATH_MAX) X(_PC_PIPE_BUF) X(_PC_CHOWN_RESTRICTED) X(_PC_NO_TRUNC) X(_PC_VDISABLE) X(_PC_ASYNC_IO) X(_PC_PRIO_IO) X(_PC_SYNC_IO) X(_PC_ALLOC_SIZE_MIN) X(_PC_FILESIZEBITS) X(_PC_REC_INCR_XFER_SIZE) X(_PC_REC_MAX_XFER_SIZE) X(_PC_REC_MIN_XFER_SIZE) X(_PC_REC_XFER_ALIGN) X(_PC_SYMLINK_MAX) X(_PC_ACL_EXTENDED) X(_PC_ACL_PATH_MAX) X(_PC_CAP_PRESENT) X(_PC_INF_PRESENT) X(_PC_MAC_PRESENT) XEND }; static struct xlat rfork_flags[] = { X(RFPROC) X(RFNOWAIT) X(RFFDG) X(RFCFDG) X(RFTHREAD) X(RFMEM) X(RFSIGSHARE) X(RFTSIGZMB) X(RFLINUXTHPN) XEND }; static struct xlat wait_options[] = { X(WNOHANG) X(WUNTRACED) X(WCONTINUED) X(WNOWAIT) X(WEXITED) X(WTRAPPED) XEND }; static struct xlat idtype_arg[] = { X(P_PID) X(P_PPID) X(P_PGID) X(P_SID) X(P_CID) X(P_UID) X(P_GID) X(P_ALL) X(P_LWPID) X(P_TASKID) X(P_PROJID) X(P_POOLID) X(P_JAILID) X(P_CTID) X(P_CPUID) X(P_PSETID) XEND }; static struct xlat procctl_arg[] = { X(PROC_SPROTECT) XEND }; static struct xlat umtx_ops[] = { X(UMTX_OP_RESERVED0) X(UMTX_OP_RESERVED1) X(UMTX_OP_WAIT) X(UMTX_OP_WAKE) X(UMTX_OP_MUTEX_TRYLOCK) X(UMTX_OP_MUTEX_LOCK) X(UMTX_OP_MUTEX_UNLOCK) X(UMTX_OP_SET_CEILING) X(UMTX_OP_CV_WAIT) X(UMTX_OP_CV_SIGNAL) X(UMTX_OP_CV_BROADCAST) X(UMTX_OP_WAIT_UINT) X(UMTX_OP_RW_RDLOCK) X(UMTX_OP_RW_WRLOCK) X(UMTX_OP_RW_UNLOCK) X(UMTX_OP_WAIT_UINT_PRIVATE) X(UMTX_OP_WAKE_PRIVATE) X(UMTX_OP_MUTEX_WAIT) X(UMTX_OP_MUTEX_WAKE) X(UMTX_OP_SEM_WAIT) X(UMTX_OP_SEM_WAKE) X(UMTX_OP_NWAKE_PRIVATE) X(UMTX_OP_MUTEX_WAKE2) XEND }; #undef X #undef XEND /* * Searches an xlat array for a value, and returns it if found. Otherwise * return a string representation. */ static const char * lookup(struct xlat *xlat, int val, int base) { static char tmp[16]; for (; xlat->str != NULL; xlat++) if (xlat->val == val) return (xlat->str); switch (base) { case 8: sprintf(tmp, "0%o", val); break; case 16: sprintf(tmp, "0x%x", val); break; case 10: sprintf(tmp, "%u", val); break; default: errx(1,"Unknown lookup base"); break; } return (tmp); } static const char * xlookup(struct xlat *xlat, int val) { return (lookup(xlat, val, 16)); } /* Searches an xlat array containing bitfield values. Remaining bits set after removing the known ones are printed at the end: IN|0x400 */ static char * xlookup_bits(struct xlat *xlat, int val) { int len, rem; static char str[512]; len = 0; rem = val; for (; xlat->str != NULL; xlat++) { if ((xlat->val & rem) == xlat->val) { /* don't print the "all-bits-zero" string unless all bits are really zero */ if (xlat->val == 0 && val != 0) continue; len += sprintf(str + len, "%s|", xlat->str); rem &= ~(xlat->val); } } /* if we have leftover bits or didn't match anything */ if (rem || len == 0) len += sprintf(str + len, "0x%x", rem); if (len && str[len - 1] == '|') len--; str[len] = 0; return (str); } /* * If/when the list gets big, it might be desirable to do it * as a hash table or binary search. */ struct syscall * get_syscall(const char *name) { struct syscall *sc; sc = syscalls; if (name == NULL) return (NULL); while (sc->name) { if (strcmp(name, sc->name) == 0) return (sc); sc++; } return (NULL); } /* * get_struct * * Copy a fixed amount of bytes from the process. */ static int get_struct(pid_t pid, void *offset, void *buf, int len) { struct ptrace_io_desc iorequest; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = offset; iorequest.piod_addr = buf; iorequest.piod_len = len; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) return (-1); return (0); } #define MAXSIZE 4096 #define BLOCKSIZE 1024 /* * get_string * Copy a string from the process. Note that it is * expected to be a C string, but if max is set, it will * only get that much. */ static char * get_string(pid_t pid, void *offset, int max) { struct ptrace_io_desc iorequest; char *buf; int diff, i, size, totalsize; diff = 0; totalsize = size = max ? (max + 1) : BLOCKSIZE; buf = malloc(totalsize); if (buf == NULL) return (NULL); for (;;) { diff = totalsize - size; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (char *)offset + diff; iorequest.piod_addr = buf + diff; iorequest.piod_len = size; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) { free(buf); return (NULL); } for (i = 0 ; i < size; i++) { if (buf[diff + i] == '\0') return (buf); } if (totalsize < MAXSIZE - BLOCKSIZE && max == 0) { totalsize += BLOCKSIZE; buf = realloc(buf, totalsize); size = BLOCKSIZE; } else { buf[totalsize - 1] = '\0'; return (buf); } } } static char * strsig2(int sig) { char *tmp; tmp = strsig(sig); if (tmp == NULL) asprintf(&tmp, "%d", sig); return (tmp); } /* * print_arg * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. The file * descriptor is for the process' memory (via /proc), and is used * to get any data (where the argument is a pointer). sc is * a pointer to the syscall description (see above); args is * an array of all of the system call arguments. */ char * print_arg(struct syscall_args *sc, unsigned long *args, long retval, struct trussinfo *trussinfo) { char *tmp; pid_t pid; tmp = NULL; pid = trussinfo->pid; switch (sc->type & ARG_MASK) { case Hex: asprintf(&tmp, "0x%x", (int)args[sc->offset]); break; case Octal: asprintf(&tmp, "0%o", (int)args[sc->offset]); break; case Int: asprintf(&tmp, "%d", (int)args[sc->offset]); break; case LongHex: asprintf(&tmp, "0x%lx", args[sc->offset]); break; case Name: { /* NULL-terminated string. */ char *tmp2; tmp2 = get_string(pid, (void*)args[sc->offset], 0); asprintf(&tmp, "\"%s\"", tmp2); free(tmp2); break; } case BinString: { /* Binary block of data that might have printable characters. XXX If type|OUT, assume that the length is the syscall's return value. Otherwise, assume that the length of the block is in the next syscall argument. */ int max_string = trussinfo->strsize; char tmp2[max_string+1], *tmp3; int len; int truncated = 0; if (sc->type & OUT) len = retval; else len = args[sc->offset + 1]; /* Don't print more than max_string characters, to avoid word wrap. If we have to truncate put some ... after the string. */ if (len > max_string) { len = max_string; truncated = 1; } if (len && get_struct(pid, (void*)args[sc->offset], &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string) break; len--; truncated = 1; }; asprintf(&tmp, "\"%s\"%s", tmp3, truncated ? "..." : ""); free(tmp3); } else { asprintf(&tmp, "0x%lx", args[sc->offset]); } break; } case StringArray: { int num, size, i; char *tmp2; char *string; char *strarray[100]; /* XXX This is ugly. */ if (get_struct(pid, (void *)args[sc->offset], (void *)&strarray, sizeof(strarray)) == -1) err(1, "get_struct %p", (void *)args[sc->offset]); num = 0; size = 0; /* Find out how large of a buffer we'll need. */ while (strarray[num] != NULL) { string = get_string(pid, (void*)strarray[num], 0); size += strlen(string); free(string); num++; } size += 4 + (num * 4); tmp = (char *)malloc(size); tmp2 = tmp; tmp2 += sprintf(tmp2, " ["); for (i = 0; i < num; i++) { string = get_string(pid, (void*)strarray[i], 0); tmp2 += sprintf(tmp2, " \"%s\"%c", string, (i + 1 == num) ? ' ' : ','); free(string); } tmp2 += sprintf(tmp2, "]"); break; } #ifdef __LP64__ case Quad: asprintf(&tmp, "0x%lx", args[sc->offset]); break; #else case Quad: { unsigned long long ll; ll = *(unsigned long long *)(args + sc->offset); asprintf(&tmp, "0x%llx", ll); break; } #endif case Ptr: asprintf(&tmp, "0x%lx", args[sc->offset]); break; case Readlinkres: { char *tmp2; if (retval == -1) { tmp = strdup(""); break; } tmp2 = get_string(pid, (void*)args[sc->offset], retval); asprintf(&tmp, "\"%s\"", tmp2); free(tmp2); break; } case Ioctl: { const char *temp = ioctlname(args[sc->offset]); if (temp) tmp = strdup(temp); else { unsigned long arg = args[sc->offset]; asprintf(&tmp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }", arg, arg & IOC_OUT ? "R" : "", arg & IOC_IN ? "W" : "", IOCGROUP(arg), isprint(IOCGROUP(arg)) ? (char)IOCGROUP(arg) : '?', arg & 0xFF, IOCPARM_LEN(arg)); } break; } case Timespec: { struct timespec ts; if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts)) != -1) asprintf(&tmp, "{%ld.%09ld }", (long)ts.tv_sec, ts.tv_nsec); else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Timeval: { struct timeval tv; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) asprintf(&tmp, "{%ld.%06ld }", (long)tv.tv_sec, tv.tv_usec); else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Timeval2: { struct timeval tv[2]; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) asprintf(&tmp, "{%ld.%06ld, %ld.%06ld }", (long)tv[0].tv_sec, tv[0].tv_usec, (long)tv[1].tv_sec, tv[1].tv_usec); else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Itimerval: { struct itimerval itv; if (get_struct(pid, (void *)args[sc->offset], &itv, sizeof(itv)) != -1) asprintf(&tmp, "{%ld.%06ld, %ld.%06ld }", (long)itv.it_interval.tv_sec, itv.it_interval.tv_usec, (long)itv.it_value.tv_sec, itv.it_value.tv_usec); else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case LinuxSockArgs: { struct linux_socketcall_args largs; if (get_struct(pid, (void *)args[sc->offset], (void *)&largs, sizeof(largs)) == -1) { err(1, "get_struct %p", (void *)args[sc->offset]); } const char *what; char buf[30]; switch (largs.what) { case LINUX_SOCKET: what = "LINUX_SOCKET"; break; case LINUX_BIND: what = "LINUX_BIND"; break; case LINUX_CONNECT: what = "LINUX_CONNECT"; break; case LINUX_LISTEN: what = "LINUX_LISTEN"; break; case LINUX_ACCEPT: what = "LINUX_ACCEPT"; break; case LINUX_GETSOCKNAME: what = "LINUX_GETSOCKNAME"; break; case LINUX_GETPEERNAME: what = "LINUX_GETPEERNAME"; break; case LINUX_SOCKETPAIR: what = "LINUX_SOCKETPAIR"; break; case LINUX_SEND: what = "LINUX_SEND"; break; case LINUX_RECV: what = "LINUX_RECV"; break; case LINUX_SENDTO: what = "LINUX_SENDTO"; break; case LINUX_RECVFROM: what = "LINUX_RECVFROM"; break; case LINUX_SHUTDOWN: what = "LINUX_SHUTDOWN"; break; case LINUX_SETSOCKOPT: what = "LINUX_SETSOCKOPT"; break; case LINUX_GETSOCKOPT: what = "LINUX_GETSOCKOPT"; break; case LINUX_SENDMSG: what = "LINUX_SENDMSG"; break; case LINUX_RECVMSG: what = "LINUX_RECVMSG"; break; default: sprintf(buf, "%d", largs.what); what = buf; break; } asprintf(&tmp, "(0x%lx)%s, 0x%lx", args[sc->offset], what, (long unsigned int)largs.args); break; } case Pollfd: { /* * XXX: A Pollfd argument expects the /next/ syscall argument * to be the number of fds in the array. This matches the poll * syscall. */ struct pollfd *pfd; int numfds = args[sc->offset+1]; int bytes = sizeof(struct pollfd) * numfds; int i, tmpsize, u, used; const int per_fd = 100; if ((pfd = malloc(bytes)) == NULL) err(1, "Cannot malloc %d bytes for pollfd array", bytes); if (get_struct(pid, (void *)args[sc->offset], pfd, bytes) != -1) { used = 0; tmpsize = 1 + per_fd * numfds + 2; if ((tmp = malloc(tmpsize)) == NULL) err(1, "Cannot alloc %d bytes for poll output", tmpsize); tmp[used++] = '{'; for (i = 0; i < numfds; i++) { u = snprintf(tmp + used, per_fd, "%s%d/%s", i > 0 ? " " : "", pfd[i].fd, xlookup_bits(poll_flags, pfd[i].events)); if (u > 0) used += u < per_fd ? u : per_fd; } tmp[used++] = '}'; tmp[used++] = '\0'; } else { asprintf(&tmp, "0x%lx", args[sc->offset]); } free(pfd); break; } case Fd_set: { /* * XXX: A Fd_set argument expects the /first/ syscall argument * to be the number of fds in the array. This matches the * select syscall. */ fd_set *fds; int numfds = args[0]; int bytes = _howmany(numfds, _NFDBITS) * _NFDBITS; int i, tmpsize, u, used; const int per_fd = 20; if ((fds = malloc(bytes)) == NULL) err(1, "Cannot malloc %d bytes for fd_set array", bytes); if (get_struct(pid, (void *)args[sc->offset], fds, bytes) != -1) { used = 0; tmpsize = 1 + numfds * per_fd + 2; if ((tmp = malloc(tmpsize)) == NULL) err(1, "Cannot alloc %d bytes for fd_set " "output", tmpsize); tmp[used++] = '{'; for (i = 0; i < numfds; i++) { if (FD_ISSET(i, fds)) { u = snprintf(tmp + used, per_fd, "%d ", i); if (u > 0) used += u < per_fd ? u : per_fd; } } if (tmp[used-1] == ' ') used--; tmp[used++] = '}'; tmp[used++] = '\0'; } else asprintf(&tmp, "0x%lx", args[sc->offset]); free(fds); break; } case Signal: tmp = strsig2(args[sc->offset]); break; case Sigset: { long sig; sigset_t ss; int i, used; char *signame; sig = args[sc->offset]; if (get_struct(pid, (void *)args[sc->offset], (void *)&ss, sizeof(ss)) == -1) { asprintf(&tmp, "0x%lx", args[sc->offset]); break; } tmp = malloc(sys_nsig * 8); /* 7 bytes avg per signal name */ used = 0; for (i = 1; i < sys_nsig; i++) { if (sigismember(&ss, i)) { signame = strsig(i); used += sprintf(tmp + used, "%s|", signame); free(signame); } } if (used) tmp[used-1] = 0; else strcpy(tmp, "0x0"); break; } case Sigprocmask: { switch (args[sc->offset]) { #define S(a) case a: tmp = strdup(#a); break; S(SIG_BLOCK); S(SIG_UNBLOCK); S(SIG_SETMASK); #undef S } if (tmp == NULL) asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Fcntlflag: { /* XXX output depends on the value of the previous argument */ switch (args[sc->offset-1]) { case F_SETFD: tmp = strdup(xlookup_bits(fcntlfd_arg, args[sc->offset])); break; case F_SETFL: tmp = strdup(xlookup_bits(fcntlfl_arg, args[sc->offset])); break; case F_GETFD: case F_GETFL: case F_GETOWN: tmp = strdup(""); break; default: asprintf(&tmp, "0x%lx", args[sc->offset]); break; } break; } case Open: tmp = strdup(xlookup_bits(open_flags, args[sc->offset])); break; case Fcntl: tmp = strdup(xlookup(fcntl_arg, args[sc->offset])); break; case Mprot: tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset])); break; case Mmapflags: { char *base, *alignstr; int align, flags; /* * MAP_ALIGNED can't be handled by xlookup_bits(), so * generate that string manually and prepend it to the * string from xlookup_bits(). Have to be careful to * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is * the only flag. */ flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK; align = args[sc->offset] & MAP_ALIGNMENT_MASK; if (align != 0) { if (align == MAP_ALIGNED_SUPER) alignstr = strdup("MAP_ALIGNED_SUPER"); else asprintf(&alignstr, "MAP_ALIGNED(%d)", align >> MAP_ALIGNMENT_SHIFT); if (flags == 0) { tmp = alignstr; break; } } else alignstr = NULL; base = strdup(xlookup_bits(mmap_flags, flags)); if (alignstr == NULL) { tmp = base; break; } asprintf(&tmp, "%s|%s", alignstr, base); free(alignstr); free(base); break; } case Whence: tmp = strdup(xlookup(whence_arg, args[sc->offset])); break; case Sockdomain: tmp = strdup(xlookup(sockdomain_arg, args[sc->offset])); break; case Socktype: tmp = strdup(xlookup(socktype_arg, args[sc->offset])); break; case Shutdown: tmp = strdup(xlookup(shutdown_arg, args[sc->offset])); break; case Resource: tmp = strdup(xlookup(resource_arg, args[sc->offset])); break; case Pathconf: tmp = strdup(xlookup(pathconf_arg, args[sc->offset])); break; case Rforkflags: tmp = strdup(xlookup_bits(rfork_flags, args[sc->offset])); break; case Sockaddr: { struct sockaddr_storage ss; char addr[64]; struct sockaddr_in *lsin; struct sockaddr_in6 *lsin6; struct sockaddr_un *sun; struct sockaddr *sa; char *p; u_char *q; int i; if (args[sc->offset] == 0) { asprintf(&tmp, "NULL"); break; } /* yuck: get ss_len */ if (get_struct(pid, (void *)args[sc->offset], (void *)&ss, sizeof(ss.ss_len) + sizeof(ss.ss_family)) == -1) err(1, "get_struct %p", (void *)args[sc->offset]); /* * If ss_len is 0, then try to guess from the sockaddr type. * AF_UNIX may be initialized incorrectly, so always frob * it by using the "right" size. */ if (ss.ss_len == 0 || ss.ss_family == AF_UNIX) { switch (ss.ss_family) { case AF_INET: ss.ss_len = sizeof(*lsin); break; case AF_UNIX: ss.ss_len = sizeof(*sun); break; default: /* hurrrr */ break; } } if (get_struct(pid, (void *)args[sc->offset], (void *)&ss, ss.ss_len) == -1) { err(2, "get_struct %p", (void *)args[sc->offset]); } switch (ss.ss_family) { case AF_INET: lsin = (struct sockaddr_in *)&ss; inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof addr); asprintf(&tmp, "{ AF_INET %s:%d }", addr, htons(lsin->sin_port)); break; case AF_INET6: lsin6 = (struct sockaddr_in6 *)&ss; inet_ntop(AF_INET6, &lsin6->sin6_addr, addr, sizeof addr); asprintf(&tmp, "{ AF_INET6 [%s]:%d }", addr, htons(lsin6->sin6_port)); break; case AF_UNIX: sun = (struct sockaddr_un *)&ss; asprintf(&tmp, "{ AF_UNIX \"%s\" }", sun->sun_path); break; default: sa = (struct sockaddr *)&ss; asprintf(&tmp, "{ sa_len = %d, sa_family = %d, sa_data " "= {%n%*s } }", (int)sa->sa_len, (int)sa->sa_family, &i, 6 * (int)(sa->sa_len - ((char *)&sa->sa_data - (char *)sa)), ""); if (tmp != NULL) { p = tmp + i; for (q = (u_char *)&sa->sa_data; q < (u_char *)sa + sa->sa_len; q++) p += sprintf(p, " %#02x,", *q); } } break; } case Sigaction: { struct sigaction sa; char *hand; const char *h; if (get_struct(pid, (void *)args[sc->offset], &sa, sizeof(sa)) != -1) { asprintf(&hand, "%p", sa.sa_handler); if (sa.sa_handler == SIG_DFL) h = "SIG_DFL"; else if (sa.sa_handler == SIG_IGN) h = "SIG_IGN"; else h = hand; asprintf(&tmp, "{ %s %s ss_t }", h, xlookup_bits(sigaction_flags, sa.sa_flags)); free(hand); } else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Kevent: { /* * XXX XXX: the size of the array is determined by either the * next syscall argument, or by the syscall returnvalue, * depending on which argument number we are. This matches the * kevent syscall, but luckily that's the only syscall that uses * them. */ struct kevent *ke; int numevents = -1; int bytes = 0; int i, tmpsize, u, used; const int per_ke = 100; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval != -1) numevents = retval; if (numevents >= 0) bytes = sizeof(struct kevent) * numevents; if ((ke = malloc(bytes)) == NULL) err(1, "Cannot malloc %d bytes for kevent array", bytes); if (numevents >= 0 && get_struct(pid, (void *)args[sc->offset], ke, bytes) != -1) { used = 0; tmpsize = 1 + per_ke * numevents + 2; if ((tmp = malloc(tmpsize)) == NULL) err(1, "Cannot alloc %d bytes for kevent " "output", tmpsize); tmp[used++] = '{'; for (i = 0; i < numevents; i++) { u = snprintf(tmp + used, per_ke, "%s%p,%s,%s,%d,%p,%p", i > 0 ? " " : "", (void *)ke[i].ident, xlookup(kevent_filters, ke[i].filter), xlookup_bits(kevent_flags, ke[i].flags), ke[i].fflags, (void *)ke[i].data, (void *)ke[i].udata); if (u > 0) used += u < per_ke ? u : per_ke; } tmp[used++] = '}'; tmp[used++] = '\0'; } else { asprintf(&tmp, "0x%lx", args[sc->offset]); } free(ke); break; } case Stat: { struct stat st; if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); asprintf(&tmp, "{ mode=%s,inode=%jd,size=%jd,blksize=%ld }", mode, (intmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { asprintf(&tmp, "0x%lx", args[sc->offset]); } break; } case Rusage: { struct rusage ru; if (get_struct(pid, (void *)args[sc->offset], &ru, sizeof(ru)) != -1) { asprintf(&tmp, "{ u=%ld.%06ld,s=%ld.%06ld,in=%ld,out=%ld }", (long)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, (long)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec, ru.ru_inblock, ru.ru_oublock); } else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case Rlimit: { struct rlimit rl; if (get_struct(pid, (void *)args[sc->offset], &rl, sizeof(rl)) != -1) { asprintf(&tmp, "{ cur=%ju,max=%ju }", rl.rlim_cur, rl.rlim_max); } else asprintf(&tmp, "0x%lx", args[sc->offset]); break; } case ExitStatus: { char *signame; int status; signame = NULL; if (get_struct(pid, (void *)args[sc->offset], &status, sizeof(status)) != -1) { if (WIFCONTINUED(status)) tmp = strdup("{ CONTINUED }"); else if (WIFEXITED(status)) asprintf(&tmp, "{ EXITED,val=%d }", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) asprintf(&tmp, "{ SIGNALED,sig=%s%s }", signame = strsig2(WTERMSIG(status)), WCOREDUMP(status) ? ",cored" : ""); else asprintf(&tmp, "{ STOPPED,sig=%s }", signame = strsig2(WTERMSIG(status))); } else asprintf(&tmp, "0x%lx", args[sc->offset]); free(signame); break; } case Waitoptions: tmp = strdup(xlookup_bits(wait_options, args[sc->offset])); break; case Idtype: tmp = strdup(xlookup(idtype_arg, args[sc->offset])); break; case Procctl: tmp = strdup(xlookup(procctl_arg, args[sc->offset])); break; case Umtxop: tmp = strdup(xlookup(umtx_ops, args[sc->offset])); break; default: errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK); } return (tmp); } /* * print_syscall * Print (to outfile) the system call and its arguments. Note that * nargs is the number of arguments (not the number of words; this is * potentially confusing, I know). */ void print_syscall(struct trussinfo *trussinfo, const char *name, int nargs, char **s_args) { struct timespec timediff; int i, len; len = 0; if (trussinfo->flags & FOLLOWFORKS) len += fprintf(trussinfo->outfile, "%5d: ", trussinfo->pid); if (name != NULL && (strcmp(name, "execve") == 0 || strcmp(name, "exit") == 0)) { clock_gettime(CLOCK_REALTIME, &trussinfo->curthread->after); } if (trussinfo->flags & ABSOLUTETIMESTAMPS) { timespecsubt(&trussinfo->curthread->after, &trussinfo->start_time, &timediff); len += fprintf(trussinfo->outfile, "%ld.%09ld ", (long)timediff.tv_sec, timediff.tv_nsec); } if (trussinfo->flags & RELATIVETIMESTAMPS) { timespecsubt(&trussinfo->curthread->after, &trussinfo->curthread->before, &timediff); len += fprintf(trussinfo->outfile, "%ld.%09ld ", (long)timediff.tv_sec, timediff.tv_nsec); } len += fprintf(trussinfo->outfile, "%s(", name); for (i = 0; i < nargs; i++) { if (s_args[i]) len += fprintf(trussinfo->outfile, "%s", s_args[i]); else len += fprintf(trussinfo->outfile, ""); len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ? "," : ""); } len += fprintf(trussinfo->outfile, ")"); for (i = 0; i < 6 - (len / 8); i++) fprintf(trussinfo->outfile, "\t"); } void print_syscall_ret(struct trussinfo *trussinfo, const char *name, int nargs, char **s_args, int errorp, long retval, struct syscall *sc) { struct timespec timediff; if (trussinfo->flags & COUNTONLY) { if (!sc) return; clock_gettime(CLOCK_REALTIME, &trussinfo->curthread->after); timespecsubt(&trussinfo->curthread->after, &trussinfo->curthread->before, &timediff); timespecadd(&sc->time, &timediff, &sc->time); sc->ncalls++; if (errorp) sc->nerror++; return; } print_syscall(trussinfo, name, nargs, s_args); fflush(trussinfo->outfile); if (errorp) fprintf(trussinfo->outfile, " ERR#%ld '%s'\n", retval, strerror(retval)); else { /* * Because pipe(2) has a special assembly glue to provide the * libc API, we have to adjust retval. */ if (name != NULL && strcmp(name, "pipe") == 0) retval = 0; fprintf(trussinfo->outfile, " = %ld (0x%lx)\n", retval, retval); } } void print_summary(struct trussinfo *trussinfo) { struct timespec total = {0, 0}; struct syscall *sc; int ncall, nerror; fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n", "syscall", "seconds", "calls", "errors"); ncall = nerror = 0; for (sc = syscalls; sc->name != NULL; sc++) if (sc->ncalls) { fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", sc->name, (intmax_t)sc->time.tv_sec, sc->time.tv_nsec, sc->ncalls, sc->nerror); timespecadd(&total, &sc->time, &total); ncall += sc->ncalls; nerror += sc->nerror; } fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n", "", "-------------", "-------", "-------"); fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror); }