Index: stable/5/sys/compat/svr4/svr4_misc.c =================================================================== --- stable/5/sys/compat/svr4/svr4_misc.c (revision 145381) +++ stable/5/sys/compat/svr4/svr4_misc.c (revision 145382) @@ -1,1731 +1,1732 @@ /*- * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * SVR4 compatibility module. * * SVR4 system calls that are implemented differently in BSD are * handled here. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) #include #include #endif #if defined(NetBSD) # if defined(UVM) # include # endif #endif #define BSD_DIRENT(cp) ((struct dirent *)(cp)) static int svr4_mknod(struct thread *, register_t *, char *, svr4_mode_t, svr4_dev_t); static __inline clock_t timeval_to_clock_t(struct timeval *); static int svr4_setinfo (struct proc *, int, svr4_siginfo_t *); struct svr4_hrtcntl_args; static int svr4_hrtcntl (struct thread *, struct svr4_hrtcntl_args *, register_t *); static void bsd_statfs_to_svr4_statvfs(const struct statfs *, struct svr4_statvfs *); static void bsd_statfs_to_svr4_statvfs64(const struct statfs *, struct svr4_statvfs64 *); static struct proc *svr4_pfind(pid_t pid); /* BOGUS noop */ #if defined(BOGUS) int svr4_sys_setitimer(td, uap) register struct thread *td; struct svr4_sys_setitimer_args *uap; { td->td_retval[0] = 0; return 0; } #endif int svr4_sys_wait(td, uap) struct thread *td; struct svr4_sys_wait_args *uap; { int error, st, sig; error = kern_wait(td, WAIT_ANY, &st, 0, NULL); if (error) return (error); if (WIFSIGNALED(st)) { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0177) | SVR4_BSD2SVR4_SIG(sig); } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0xff00) | (SVR4_BSD2SVR4_SIG(sig) << 8); } /* * It looks like wait(2) on svr4/solaris/2.4 returns * the status in retval[1], and the pid on retval[0]. */ td->td_retval[1] = st; if (uap->status) error = copyout(&st, uap->status, sizeof(st)); return (error); } int svr4_sys_execv(td, uap) struct thread *td; struct svr4_sys_execv_args *uap; { struct execve_args ap; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); ap.fname = uap->path; ap.argv = uap->argp; ap.envv = NULL; return execve(td, &ap); } int svr4_sys_execve(td, uap) struct thread *td; struct svr4_sys_execve_args *uap; { struct execve_args ap; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); ap.fname = uap->path; ap.argv = uap->argp; ap.envv = uap->envp; return execve(td, &ap); } int svr4_sys_time(td, v) struct thread *td; struct svr4_sys_time_args *v; { struct svr4_sys_time_args *uap = v; int error = 0; struct timeval tv; microtime(&tv); if (uap->t) error = copyout(&tv.tv_sec, uap->t, sizeof(*(uap->t))); td->td_retval[0] = (int) tv.tv_sec; return error; } /* * Read SVR4-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. * * This code is ported from the Linux emulator: Changes to the VFS interface * between FreeBSD and NetBSD have made it simpler to port it from there than * to adapt the NetBSD version. */ int svr4_sys_getdents64(td, uap) struct thread *td; struct svr4_sys_getdents64_args *uap; { register struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4reclen=0; /* SVR4-format */ struct file *fp; struct uio auio; struct iovec aiov; off_t off; struct svr4_dirent64 svr4_dirent; int buflen, error, eofflag, nbytes, justone; u_long *cookies = NULL, *cookiep; int ncookies; DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", uap->fd, uap->nbytes)); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) { return (error); } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } nbytes = uap->nbytes; if (nbytes == 1) { nbytes = sizeof (struct svr4_dirent64); justone = 1; } else justone = 0; off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) goto out; #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies); if (error) { goto out; } inp = buf; outp = (caddr_t) uap->dp; resid = nbytes; if ((len = buflen - auio.uio_resid) <= 0) { goto eof; } cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { bdp = (struct dirent *) inp; len -= bdp->d_reclen; inp += bdp->d_reclen; cookiep++; ncookies--; } } while (len > 0) { if (cookiep && ncookies == 0) break; bdp = (struct dirent *) inp; reclen = bdp->d_reclen; if (reclen & 3) { DPRINTF(("svr4_readdir: reclen=%d\n", reclen)); error = EFAULT; goto out; } if (bdp->d_fileno == 0) { inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; len -= reclen; continue; } svr4reclen = SVR4_RECLEN(&svr4_dirent, bdp->d_namlen); if (reclen > len || resid < svr4reclen) { outp++; break; } svr4_dirent.d_ino = (long) bdp->d_fileno; if (justone) { /* * old svr4-style readdir usage. */ svr4_dirent.d_off = (svr4_off_t) svr4reclen; svr4_dirent.d_reclen = (u_short) bdp->d_namlen; } else { svr4_dirent.d_off = (svr4_off_t)(off + reclen); svr4_dirent.d_reclen = (u_short) svr4reclen; } strcpy(svr4_dirent.d_name, bdp->d_name); if ((error = copyout((caddr_t)&svr4_dirent, outp, svr4reclen))) goto out; inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; outp += svr4reclen; resid -= svr4reclen; len -= reclen; if (justone) break; } if (outp == (caddr_t) uap->dp) goto again; fp->f_offset = off; if (justone) nbytes = resid + svr4reclen; eof: td->td_retval[0] = nbytes - resid; out: VOP_UNLOCK(vp, 0, td); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return error; } int svr4_sys_getdents(td, uap) struct thread *td; struct svr4_sys_getdents_args *uap; { struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4_reclen; /* SVR4-format */ struct file *fp; struct uio auio; struct iovec aiov; struct svr4_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookiebuf = NULL, *cookie; int ncookies = 0, *retval = td->td_retval; if (uap->nbytes < 0) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } buflen = min(MAXBSIZE, uap->nbytes); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); off = fp->f_offset; again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookiebuf); if (error) { goto out; } inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) == 0) goto eof; for (cookie = cookiebuf; len > 0; len -= reclen) { bdp = (struct dirent *)inp; reclen = bdp->d_reclen; if (reclen & 3) panic("svr4_sys_getdents64: bad reclen"); off = *cookie++; /* each entry points to the next */ if ((off >> 32) != 0) { uprintf("svr4_sys_getdents64: dir offset too large for emulated program"); error = EINVAL; goto out; } if (bdp->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ continue; } svr4_reclen = SVR4_RECLEN(&idb, bdp->d_namlen); if (reclen > len || resid < svr4_reclen) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make a SVR4-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (svr4_ino_t)bdp->d_fileno; idb.d_off = (svr4_off_t)off; idb.d_reclen = (u_short)svr4_reclen; strcpy(idb.d_name, bdp->d_name); if ((error = copyout((caddr_t)&idb, outp, svr4_reclen))) goto out; /* advance past this real entry */ inp += reclen; /* advance output past SVR4-shaped entry */ outp += svr4_reclen; resid -= svr4_reclen; } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: *retval = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0, td); fdrop(fp, td); if (cookiebuf) free(cookiebuf, M_TEMP); free(buf, M_TEMP); return error; } int svr4_sys_mmap(td, uap) struct thread *td; struct svr4_sys_mmap_args *uap; { struct mmap_args mm; int *retval; retval = td->td_retval; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (uap->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (uap->len == 0) return EINVAL; mm.prot = uap->prot; mm.len = uap->len; mm.flags = uap->flags & ~_MAP_NEW; mm.fd = uap->fd; mm.addr = uap->addr; mm.pos = uap->pos; return mmap(td, &mm); } int svr4_sys_mmap64(td, uap) struct thread *td; struct svr4_sys_mmap64_args *uap; { struct mmap_args mm; void *rp; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (uap->prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (uap->len == 0) return EINVAL; mm.prot = uap->prot; mm.len = uap->len; mm.flags = uap->flags & ~_MAP_NEW; mm.fd = uap->fd; mm.addr = uap->addr; mm.pos = uap->pos; rp = (void *) round_page((vm_offset_t)(td->td_proc->p_vmspace->vm_daddr + maxdsiz)); if ((mm.flags & MAP_FIXED) == 0 && mm.addr != 0 && (void *)mm.addr < rp) mm.addr = rp; return mmap(td, &mm); } int svr4_sys_fchroot(td, uap) struct thread *td; struct svr4_sys_fchroot_args *uap; { struct filedesc *fdp = td->td_proc->p_fd; struct vnode *vp, *vpold; struct file *fp; int error; if ((error = suser(td)) != 0) return error; if ((error = getvnode(fdp, uap->fd, &fp)) != 0) return error; vp = fp->f_vnode; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); if (error) { fdrop(fp, td); return error; } VREF(vp); FILEDESC_LOCK_FAST(fdp); vpold = fdp->fd_rdir; fdp->fd_rdir = vp; FILEDESC_UNLOCK_FAST(fdp); if (vpold != NULL) vrele(vpold); fdrop(fp, td); return 0; } static int svr4_mknod(td, retval, path, mode, dev) struct thread *td; register_t *retval; char *path; svr4_mode_t mode; svr4_dev_t dev; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, path); if (S_ISFIFO(mode)) { struct mkfifo_args ap; ap.path = path; ap.mode = mode; return mkfifo(td, &ap); } else { struct mknod_args ap; ap.path = path; ap.mode = mode; ap.dev = dev; return mknod(td, &ap); } } int svr4_sys_mknod(td, uap) register struct thread *td; struct svr4_sys_mknod_args *uap; { int *retval = td->td_retval; return svr4_mknod(td, retval, uap->path, uap->mode, (svr4_dev_t)svr4_to_bsd_odev_t(uap->dev)); } int svr4_sys_xmknod(td, uap) struct thread *td; struct svr4_sys_xmknod_args *uap; { int *retval = td->td_retval; return svr4_mknod(td, retval, uap->path, uap->mode, (svr4_dev_t)svr4_to_bsd_dev_t(uap->dev)); } int svr4_sys_vhangup(td, uap) struct thread *td; struct svr4_sys_vhangup_args *uap; { return 0; } int svr4_sys_sysconfig(td, uap) struct thread *td; struct svr4_sys_sysconfig_args *uap; { int *retval; retval = &(td->td_retval[0]); switch (uap->name) { case SVR4_CONFIG_UNUSED: *retval = 0; break; case SVR4_CONFIG_NGROUPS: *retval = NGROUPS_MAX; break; case SVR4_CONFIG_CHILD_MAX: *retval = maxproc; break; case SVR4_CONFIG_OPEN_FILES: *retval = maxfiles; break; case SVR4_CONFIG_POSIX_VER: *retval = 198808; break; case SVR4_CONFIG_PAGESIZE: *retval = PAGE_SIZE; break; case SVR4_CONFIG_CLK_TCK: *retval = 60; /* should this be `hz', ie. 100? */ break; case SVR4_CONFIG_XOPEN_VER: *retval = 2; /* XXX: What should that be? */ break; case SVR4_CONFIG_PROF_TCK: *retval = 60; /* XXX: What should that be? */ break; case SVR4_CONFIG_NPROC_CONF: *retval = 1; /* Only one processor for now */ break; case SVR4_CONFIG_NPROC_ONLN: *retval = 1; /* And it better be online */ break; case SVR4_CONFIG_AIO_LISTIO_MAX: case SVR4_CONFIG_AIO_MAX: case SVR4_CONFIG_AIO_PRIO_DELTA_MAX: *retval = 0; /* No aio support */ break; case SVR4_CONFIG_DELAYTIMER_MAX: *retval = 0; /* No delaytimer support */ break; case SVR4_CONFIG_MQ_OPEN_MAX: *retval = msginfo.msgmni; break; case SVR4_CONFIG_MQ_PRIO_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_RTSIG_MAX: *retval = 0; break; case SVR4_CONFIG_SEM_NSEMS_MAX: *retval = seminfo.semmni; break; case SVR4_CONFIG_SEM_VALUE_MAX: *retval = seminfo.semvmx; break; case SVR4_CONFIG_SIGQUEUE_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_SIGRT_MIN: case SVR4_CONFIG_SIGRT_MAX: *retval = 0; /* No real time signals */ break; case SVR4_CONFIG_TIMER_MAX: *retval = 3; /* XXX: real, virtual, profiling */ break; #if defined(NOTYET) case SVR4_CONFIG_PHYS_PAGES: #if defined(UVM) *retval = uvmexp.free; /* XXX: free instead of total */ #else *retval = cnt.v_free_count; /* XXX: free instead of total */ #endif break; case SVR4_CONFIG_AVPHYS_PAGES: #if defined(UVM) *retval = uvmexp.active; /* XXX: active instead of avg */ #else *retval = cnt.v_active_count; /* XXX: active instead of avg */ #endif break; #endif /* NOTYET */ default: return EINVAL; } return 0; } /* ARGSUSED */ int svr4_sys_break(td, uap) struct thread *td; struct svr4_sys_break_args *uap; { struct proc *p = td->td_proc; struct vmspace *vm = p->p_vmspace; vm_offset_t new, old, base, ns; int rv; base = round_page((vm_offset_t) vm->vm_daddr); ns = (vm_offset_t)uap->nsize; new = round_page(ns); if (new > base) { PROC_LOCK(p); if ((new - base) > (unsigned)lim_cur(p, RLIMIT_DATA)) { PROC_UNLOCK(p); return ENOMEM; } PROC_UNLOCK(p); if (new >= VM_MAXUSER_ADDRESS) return (ENOMEM); } else if (new < base) { /* * This is simply an invalid value. If someone wants to * do fancy address space manipulations, mmap and munmap * can do most of what the user would want. */ return EINVAL; } old = base + ctob(vm->vm_dsize); if (new > old) { vm_size_t diff; diff = new - old; PROC_LOCK(p); if (vm->vm_map.size + diff > lim_cur(p, RLIMIT_VMEM)) { PROC_UNLOCK(p); return(ENOMEM); } PROC_UNLOCK(p); rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { return (ENOMEM); } vm->vm_dsize += btoc(diff); } else if (new < old) { rv = vm_map_remove(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { return (ENOMEM); } vm->vm_dsize -= btoc(old - new); } return (0); } static __inline clock_t timeval_to_clock_t(tv) struct timeval *tv; { return tv->tv_sec * hz + tv->tv_usec / (1000000 / hz); } int svr4_sys_times(td, uap) struct thread *td; struct svr4_sys_times_args *uap; { int error, *retval = td->td_retval; struct tms tms; struct timeval t; struct rusage *ru; struct rusage r; struct getrusage_args ga; caddr_t sg = stackgap_init(); ru = stackgap_alloc(&sg, sizeof(struct rusage)); ga.who = RUSAGE_SELF; ga.rusage = ru; error = getrusage(td, &ga); if (error) return error; if ((error = copyin(ru, &r, sizeof r)) != 0) return error; tms.tms_utime = timeval_to_clock_t(&r.ru_utime); tms.tms_stime = timeval_to_clock_t(&r.ru_stime); ga.who = RUSAGE_CHILDREN; error = getrusage(td, &ga); if (error) return error; if ((error = copyin(ru, &r, sizeof r)) != 0) return error; tms.tms_cutime = timeval_to_clock_t(&r.ru_utime); tms.tms_cstime = timeval_to_clock_t(&r.ru_stime); microtime(&t); *retval = timeval_to_clock_t(&t); return copyout(&tms, uap->tp, sizeof(tms)); } int svr4_sys_ulimit(td, uap) struct thread *td; struct svr4_sys_ulimit_args *uap; { int *retval = td->td_retval; int error; switch (uap->cmd) { case SVR4_GFILLIM: PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_FSIZE) / 512; PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; case SVR4_SFILLIM: { struct rlimit krl; krl.rlim_cur = uap->newlimit * 512; PROC_LOCK(td->td_proc); krl.rlim_max = lim_max(td->td_proc, RLIMIT_FSIZE); PROC_UNLOCK(td->td_proc); error = kern_setrlimit(td, RLIMIT_FSIZE, &krl); if (error) return error; PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_FSIZE); PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; } case SVR4_GMEMLIM: { struct vmspace *vm = td->td_proc->p_vmspace; register_t r; PROC_LOCK(td->td_proc); r = lim_cur(td->td_proc, RLIMIT_DATA); PROC_UNLOCK(td->td_proc); if (r == -1) r = 0x7fffffff; mtx_lock(&Giant); /* XXX */ r += (long) vm->vm_daddr; mtx_unlock(&Giant); if (r < 0) r = 0x7fffffff; *retval = r; return 0; } case SVR4_GDESLIM: PROC_LOCK(td->td_proc); *retval = lim_cur(td->td_proc, RLIMIT_NOFILE); PROC_UNLOCK(td->td_proc); if (*retval == -1) *retval = 0x7fffffff; return 0; default: return EINVAL; } } static struct proc * svr4_pfind(pid) pid_t pid; { struct proc *p; /* look in the live processes */ if ((p = pfind(pid)) == NULL) /* look in the zombies */ p = zpfind(pid); return p; } int svr4_sys_pgrpsys(td, uap) struct thread *td; struct svr4_sys_pgrpsys_args *uap; { int *retval = td->td_retval; struct proc *p = td->td_proc; switch (uap->cmd) { case 1: /* setpgrp() */ /* * SVR4 setpgrp() (which takes no arguments) has the * semantics that the session ID is also created anew, so * in almost every sense, setpgrp() is identical to * setsid() for SVR4. (Under BSD, the difference is that * a setpgid(0,0) will not create a new session.) */ setsid(td, NULL); /*FALLTHROUGH*/ case 0: /* getpgrp() */ PROC_LOCK(p); *retval = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 2: /* getsid(pid) */ if (uap->pid == 0) PROC_LOCK(p); else if ((p = svr4_pfind(uap->pid)) == NULL) return ESRCH; /* * This has already been initialized to the pid of * the session leader. */ *retval = (register_t) p->p_session->s_sid; PROC_UNLOCK(p); return 0; case 3: /* setsid() */ return setsid(td, NULL); case 4: /* getpgid(pid) */ if (uap->pid == 0) PROC_LOCK(p); else if ((p = svr4_pfind(uap->pid)) == NULL) return ESRCH; *retval = (int) p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 5: /* setpgid(pid, pgid); */ { struct setpgid_args sa; sa.pid = uap->pid; sa.pgid = uap->pgid; return setpgid(td, &sa); } default: return EINVAL; } } #define syscallarg(x) union { x datum; register_t pad; } struct svr4_hrtcntl_args { int cmd; int fun; int clk; svr4_hrt_interval_t * iv; svr4_hrt_time_t * ti; }; static int svr4_hrtcntl(td, uap, retval) struct thread *td; struct svr4_hrtcntl_args *uap; register_t *retval; { switch (uap->fun) { case SVR4_HRT_CNTL_RES: DPRINTF(("htrcntl(RES)\n")); *retval = SVR4_HRT_USEC; return 0; case SVR4_HRT_CNTL_TOFD: DPRINTF(("htrcntl(TOFD)\n")); { struct timeval tv; svr4_hrt_time_t t; if (uap->clk != SVR4_HRT_CLK_STD) { DPRINTF(("clk == %d\n", uap->clk)); return EINVAL; } if (uap->ti == NULL) { DPRINTF(("ti NULL\n")); return EINVAL; } microtime(&tv); t.h_sec = tv.tv_sec; t.h_rem = tv.tv_usec; t.h_res = SVR4_HRT_USEC; return copyout(&t, uap->ti, sizeof(t)); } case SVR4_HRT_CNTL_START: DPRINTF(("htrcntl(START)\n")); return ENOSYS; case SVR4_HRT_CNTL_GET: DPRINTF(("htrcntl(GET)\n")); return ENOSYS; default: DPRINTF(("Bad htrcntl command %d\n", uap->fun)); return ENOSYS; } } int svr4_sys_hrtsys(td, uap) struct thread *td; struct svr4_sys_hrtsys_args *uap; { int *retval = td->td_retval; switch (uap->cmd) { case SVR4_HRT_CNTL: return svr4_hrtcntl(td, (struct svr4_hrtcntl_args *) uap, retval); case SVR4_HRT_ALRM: DPRINTF(("hrtalarm\n")); return ENOSYS; case SVR4_HRT_SLP: DPRINTF(("hrtsleep\n")); return ENOSYS; case SVR4_HRT_CAN: DPRINTF(("hrtcancel\n")); return ENOSYS; default: DPRINTF(("Bad hrtsys command %d\n", uap->cmd)); return EINVAL; } } static int svr4_setinfo(p, st, s) struct proc *p; int st; svr4_siginfo_t *s; { svr4_siginfo_t i; int sig; memset(&i, 0, sizeof(i)); i.si_signo = SVR4_SIGCHLD; i.si_errno = 0; /* XXX? */ if (p) { i.si_pid = p->p_pid; mtx_lock_spin(&sched_lock); if (p->p_state == PRS_ZOMBIE) { i.si_stime = p->p_ru->ru_stime.tv_sec; i.si_utime = p->p_ru->ru_utime.tv_sec; } else { i.si_stime = p->p_stats->p_ru.ru_stime.tv_sec; i.si_utime = p->p_stats->p_ru.ru_utime.tv_sec; } mtx_unlock_spin(&sched_lock); } if (WIFEXITED(st)) { i.si_status = WEXITSTATUS(st); i.si_code = SVR4_CLD_EXITED; } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) i.si_status = SVR4_BSD2SVR4_SIG(sig); if (i.si_status == SVR4_SIGCONT) i.si_code = SVR4_CLD_CONTINUED; else i.si_code = SVR4_CLD_STOPPED; } else { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) i.si_status = SVR4_BSD2SVR4_SIG(sig); if (WCOREDUMP(st)) i.si_code = SVR4_CLD_DUMPED; else i.si_code = SVR4_CLD_KILLED; } DPRINTF(("siginfo [pid %ld signo %d code %d errno %d status %d]\n", i.si_pid, i.si_signo, i.si_code, i.si_errno, i.si_status)); return copyout(&i, s, sizeof(i)); } int svr4_sys_waitsys(td, uap) struct thread *td; struct svr4_sys_waitsys_args *uap; { int nfound; int error, *retval = td->td_retval; struct proc *q, *t; switch (uap->grp) { case SVR4_P_PID: break; case SVR4_P_PGID: PROC_LOCK(td->td_proc); uap->id = -td->td_proc->p_pgid; PROC_UNLOCK(td->td_proc); break; case SVR4_P_ALL: uap->id = WAIT_ANY; break; default: return EINVAL; } DPRINTF(("waitsys(%d, %d, %p, %x)\n", uap->grp, uap->id, uap->info, uap->options)); loop: nfound = 0; sx_slock(&proctree_lock); LIST_FOREACH(q, &td->td_proc->p_children, p_sibling) { PROC_LOCK(q); if (uap->id != WAIT_ANY && q->p_pid != uap->id && q->p_pgid != -uap->id) { PROC_UNLOCK(q); DPRINTF(("pid %d pgid %d != %d\n", q->p_pid, q->p_pgid, uap->id)); continue; } nfound++; if ((q->p_state == PRS_ZOMBIE) && ((uap->options & (SVR4_WEXITED|SVR4_WTRAPPED)))) { PROC_UNLOCK(q); sx_sunlock(&proctree_lock); *retval = 0; DPRINTF(("found %d\n", q->p_pid)); error = svr4_setinfo(q, q->p_xstat, uap->info); if (error != 0) return error; if ((uap->options & SVR4_WNOWAIT)) { DPRINTF(("Don't wait\n")); return 0; } /* * If we got the child via ptrace(2) or procfs, and * the parent is different (meaning the process was * attached, rather than run as a child), then we need * to give it back to the old parent, and send the * parent a SIGCHLD. The rest of the cleanup will be * done when the old parent waits on the child. */ sx_xlock(&proctree_lock); PROC_LOCK(q); if (q->p_flag & P_TRACED) { if (q->p_oppid != q->p_pptr->p_pid) { PROC_UNLOCK(q); t = pfind(q->p_oppid); if (t == NULL) { t = initproc; PROC_LOCK(initproc); } PROC_LOCK(q); proc_reparent(q, t); q->p_oppid = 0; q->p_flag &= ~(P_TRACED | P_WAITED); PROC_UNLOCK(q); psignal(t, SIGCHLD); wakeup(t); PROC_UNLOCK(t); sx_xunlock(&proctree_lock); return 0; } } PROC_UNLOCK(q); sx_xunlock(&proctree_lock); q->p_xstat = 0; ruadd(&td->td_proc->p_stats->p_cru, q->p_ru); FREE(q->p_ru, M_ZOMBIE); q->p_ru = 0; /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(q->p_ucred->cr_ruidinfo, -1, 0); /* * Release reference to text vnode. */ if (q->p_textvp) vrele(q->p_textvp); /* * Free up credentials. */ crfree(q->p_ucred); q->p_ucred = NULL; /* * Remove unused arguments */ pargs_drop(q->p_args); PROC_UNLOCK(q); /* * Finally finished with old proc entry. * Unlink it from its process group and free it. */ sx_xlock(&proctree_lock); leavepgrp(q); sx_xlock(&allproc_lock); LIST_REMOVE(q, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); LIST_REMOVE(q, p_sibling); sx_xunlock(&proctree_lock); PROC_LOCK(q); sigacts_free(q->p_sigacts); q->p_sigacts = NULL; PROC_UNLOCK(q); /* * Give machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. */ vm_waitproc(q); #if defined(__NetBSD__) pool_put(&proc_pool, q); #endif #ifdef __FreeBSD__ mtx_destroy(&q->p_mtx); #ifdef MAC mac_destroy_proc(q); #endif uma_zfree(proc_zone, q); #endif nprocs--; return 0; } /* XXXKSE this needs clarification */ if (P_SHOULDSTOP(q) && ((q->p_flag & P_WAITED) == 0) && (q->p_flag & P_TRACED || (uap->options & (SVR4_WSTOPPED|SVR4_WCONTINUED)))) { DPRINTF(("jobcontrol %d\n", q->p_pid)); if (((uap->options & SVR4_WNOWAIT)) == 0) q->p_flag |= P_WAITED; PROC_UNLOCK(q); *retval = 0; return svr4_setinfo(q, W_STOPCODE(q->p_xstat), uap->info); } PROC_UNLOCK(q); } if (nfound == 0) return ECHILD; if (uap->options & SVR4_WNOHANG) { *retval = 0; if ((error = svr4_setinfo(NULL, 0, uap->info)) != 0) return error; return 0; } if ((error = tsleep(td->td_proc, PWAIT | PCATCH, "svr4_wait", 0)) != 0) return error; goto loop; } static void bsd_statfs_to_svr4_statvfs(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } static void bsd_statfs_to_svr4_statvfs64(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs64 *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } int svr4_sys_statvfs(td, uap) struct thread *td; struct svr4_sys_statvfs_args *uap; { struct statfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs sfs; int error; CHECKALTEXIST(td, &sg, uap->path); fs_args.path = uap->path; fs_args.buf = fs; if ((error = statfs(td, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_fstatvfs(td, uap) struct thread *td; struct svr4_sys_fstatvfs_args *uap; { struct fstatfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs sfs; int error; fs_args.fd = uap->fd; fs_args.buf = fs; if ((error = fstatfs(td, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_statvfs64(td, uap) struct thread *td; struct svr4_sys_statvfs64_args *uap; { struct statfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs64 sfs; int error; CHECKALTEXIST(td, &sg, uap->path); fs_args.path = uap->path; fs_args.buf = fs; if ((error = statfs(td, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_fstatvfs64(td, uap) struct thread *td; struct svr4_sys_fstatvfs64_args *uap; { struct fstatfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs64 sfs; int error; fs_args.fd = uap->fd; fs_args.buf = fs; if ((error = fstatfs(td, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, uap->fs, sizeof(sfs)); } int svr4_sys_alarm(td, uap) struct thread *td; struct svr4_sys_alarm_args *uap; { int error; struct itimerval *itp, *oitp; struct setitimer_args sa; caddr_t sg = stackgap_init(); itp = stackgap_alloc(&sg, sizeof(*itp)); oitp = stackgap_alloc(&sg, sizeof(*oitp)); timevalclear(&itp->it_interval); itp->it_value.tv_sec = uap->sec; itp->it_value.tv_usec = 0; sa.which = ITIMER_REAL; sa.itv = itp; sa.oitv = oitp; error = setitimer(td, &sa); if (error) return error; if (oitp->it_value.tv_usec) oitp->it_value.tv_sec++; td->td_retval[0] = oitp->it_value.tv_sec; return 0; } int svr4_sys_gettimeofday(td, uap) struct thread *td; struct svr4_sys_gettimeofday_args *uap; { if (uap->tp) { struct timeval atv; microtime(&atv); return copyout(&atv, uap->tp, sizeof (atv)); } return 0; } int svr4_sys_facl(td, uap) struct thread *td; struct svr4_sys_facl_args *uap; { int *retval; retval = td->td_retval; *retval = 0; switch (uap->cmd) { case SVR4_SYS_SETACL: /* We don't support acls on any filesystem */ return ENOSYS; case SVR4_SYS_GETACL: return copyout(retval, &uap->num, sizeof(uap->num)); case SVR4_SYS_GETACLCNT: return 0; default: return EINVAL; } } int svr4_sys_acl(td, uap) struct thread *td; struct svr4_sys_acl_args *uap; { /* XXX: for now the same */ return svr4_sys_facl(td, (struct svr4_sys_facl_args *)uap); } int svr4_sys_auditsys(td, uap) struct thread *td; struct svr4_sys_auditsys_args *uap; { /* * XXX: Big brother is *not* watching. */ return 0; } int svr4_sys_memcntl(td, uap) struct thread *td; struct svr4_sys_memcntl_args *uap; { switch (uap->cmd) { case SVR4_MC_SYNC: { struct msync_args msa; msa.addr = uap->addr; msa.len = uap->len; msa.flags = (int)uap->arg; return msync(td, &msa); } case SVR4_MC_ADVISE: { struct madvise_args maa; maa.addr = uap->addr; maa.len = uap->len; maa.behav = (int)uap->arg; return madvise(td, &maa); } case SVR4_MC_LOCK: case SVR4_MC_UNLOCK: case SVR4_MC_LOCKAS: case SVR4_MC_UNLOCKAS: return EOPNOTSUPP; default: return ENOSYS; } } int svr4_sys_nice(td, uap) struct thread *td; struct svr4_sys_nice_args *uap; { struct setpriority_args ap; int error; ap.which = PRIO_PROCESS; ap.who = 0; ap.prio = uap->prio; if ((error = setpriority(td, &ap)) != 0) return error; /* the cast is stupid, but the structures are the same */ if ((error = getpriority(td, (struct getpriority_args *)&ap)) != 0) return error; return 0; } int svr4_sys_resolvepath(td, uap) struct thread *td; struct svr4_sys_resolvepath_args *uap; { struct nameidata nd; int error, *retval = td->td_retval; unsigned int ncopy; NDINIT(&nd, LOOKUP, NOFOLLOW | SAVENAME, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return error; ncopy = min(uap->bufsiz, strlen(nd.ni_cnd.cn_pnbuf) + 1); if ((error = copyout(nd.ni_cnd.cn_pnbuf, uap->buf, ncopy)) != 0) goto bad; *retval = ncopy; bad: NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); return error; } Index: stable/5/sys/i386/ibcs2/ibcs2_misc.c =================================================================== --- stable/5/sys/i386/ibcs2/ibcs2_misc.c (revision 145381) +++ stable/5/sys/i386/ibcs2/ibcs2_misc.c (revision 145382) @@ -1,1205 +1,1206 @@ /*- * Copyright (c) 1995 Steven Wallace * Copyright (c) 1994, 1995 Scott Bartram * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Header: sun_misc.c,v 1.16 93/04/07 02:46:27 torek Exp * * @(#)sun_misc.c 8.1 (Berkeley) 6/18/93 */ #include __FBSDID("$FreeBSD$"); /* * IBCS2 compatibility module. * * IBCS2 system calls that are implemented differently in BSD are * handled here. */ #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include int ibcs2_ulimit(td, uap) struct thread *td; struct ibcs2_ulimit_args *uap; { struct rlimit rl; struct proc *p; int error; #define IBCS2_GETFSIZE 1 #define IBCS2_SETFSIZE 2 #define IBCS2_GETPSIZE 3 #define IBCS2_GETDTABLESIZE 4 p = td->td_proc; switch (uap->cmd) { case IBCS2_GETFSIZE: PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_FSIZE); PROC_UNLOCK(p); if (td->td_retval[0] == -1) td->td_retval[0] = 0x7fffffff; return 0; case IBCS2_SETFSIZE: PROC_LOCK(p); rl.rlim_max = lim_max(p, RLIMIT_FSIZE); PROC_UNLOCK(p); rl.rlim_cur = uap->newlimit; error = kern_setrlimit(td, RLIMIT_FSIZE, &rl); if (!error) { PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_FSIZE); PROC_UNLOCK(p); } else { DPRINTF(("failed ")); } return error; case IBCS2_GETPSIZE: PROC_LOCK(p); td->td_retval[0] = lim_cur(p, RLIMIT_RSS); /* XXX */ PROC_UNLOCK(p); return 0; case IBCS2_GETDTABLESIZE: uap->cmd = IBCS2_SC_OPEN_MAX; return ibcs2_sysconf(td, (struct ibcs2_sysconf_args *)uap); default: return ENOSYS; } } #define IBCS2_WSTOPPED 0177 #define IBCS2_STOPCODE(sig) ((sig) << 8 | IBCS2_WSTOPPED) int ibcs2_wait(td, uap) struct thread *td; struct ibcs2_wait_args *uap; { int error, options, status; int *statusp; pid_t pid; struct trapframe *tf = td->td_frame; if ((tf->tf_eflags & (PSL_Z|PSL_PF|PSL_N|PSL_V)) == (PSL_Z|PSL_PF|PSL_N|PSL_V)) { /* waitpid */ pid = uap->a1; statusp = (int *)uap->a2; options = uap->a3; } else { /* wait */ pid = WAIT_ANY; statusp = (int *)uap->a1; options = 0; } error = kern_wait(td, pid, &status, options, NULL); if (error) return error; if (statusp) { /* * Convert status/signal result. */ if (WIFSTOPPED(status)) { if (WSTOPSIG(status) <= 0 || WSTOPSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = IBCS2_STOPCODE(bsd_to_ibcs2_sig[_SIG_IDX(WSTOPSIG(status))]); } else if (WIFSIGNALED(status)) { if (WTERMSIG(status) <= 0 || WTERMSIG(status) > IBCS2_SIGTBLSZ) return (EINVAL); status = bsd_to_ibcs2_sig[_SIG_IDX(WTERMSIG(status))]; } /* else exit status -- identical */ /* record result/status */ td->td_retval[1] = status; return copyout(&status, statusp, sizeof(status)); } return 0; } int ibcs2_execv(td, uap) struct thread *td; struct ibcs2_execv_args *uap; { struct execve_args ea; caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); ea.fname = uap->path; ea.argv = uap->argp; ea.envv = NULL; return execve(td, &ea); } int ibcs2_execve(td, uap) struct thread *td; struct ibcs2_execve_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return execve(td, (struct execve_args *)uap); } int ibcs2_umount(td, uap) struct thread *td; struct ibcs2_umount_args *uap; { struct unmount_args um; um.path = uap->name; um.flags = 0; return unmount(td, &um); } int ibcs2_mount(td, uap) struct thread *td; struct ibcs2_mount_args *uap; { #ifdef notyet int oflags = uap->flags, nflags, error; char fsname[MFSNAMELEN]; if (oflags & (IBCS2_MS_NOSUB | IBCS2_MS_SYS5)) return (EINVAL); if ((oflags & IBCS2_MS_NEWTYPE) == 0) return (EINVAL); nflags = 0; if (oflags & IBCS2_MS_RDONLY) nflags |= MNT_RDONLY; if (oflags & IBCS2_MS_NOSUID) nflags |= MNT_NOSUID; if (oflags & IBCS2_MS_REMOUNT) nflags |= MNT_UPDATE; uap->flags = nflags; if (error = copyinstr((caddr_t)uap->type, fsname, sizeof fsname, (u_int *)0)) return (error); if (strcmp(fsname, "4.2") == 0) { uap->type = (caddr_t)STACK_ALLOC(); if (error = copyout("ufs", uap->type, sizeof("ufs"))) return (error); } else if (strcmp(fsname, "nfs") == 0) { struct ibcs2_nfs_args sna; struct sockaddr_in sain; struct nfs_args na; struct sockaddr sa; if (error = copyin(uap->data, &sna, sizeof sna)) return (error); if (error = copyin(sna.addr, &sain, sizeof sain)) return (error); bcopy(&sain, &sa, sizeof sa); sa.sa_len = sizeof(sain); uap->data = (caddr_t)STACK_ALLOC(); na.addr = (struct sockaddr *)((int)uap->data + sizeof na); na.sotype = SOCK_DGRAM; na.proto = IPPROTO_UDP; na.fh = (nfsv2fh_t *)sna.fh; na.flags = sna.flags; na.wsize = sna.wsize; na.rsize = sna.rsize; na.timeo = sna.timeo; na.retrans = sna.retrans; na.hostname = sna.hostname; if (error = copyout(&sa, na.addr, sizeof sa)) return (error); if (error = copyout(&na, uap->data, sizeof na)) return (error); } return (mount(td, uap)); #else return EINVAL; #endif } /* * Read iBCS2-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. Like * SunOS, we squish out `empty' entries. * * This is quite ugly, but what do you expect from compatibility code? */ int ibcs2_getdents(td, uap) struct thread *td; register struct ibcs2_getdents_args *uap; { register struct vnode *vp; register caddr_t inp, buf; /* BSD-format */ register int len, reclen; /* BSD-format */ register caddr_t outp; /* iBCS2-format */ register int resid; /* iBCS2-format */ struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookies = NULL, *cookiep; int ncookies; #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { /* XXX vnode readdir op should do this */ fdrop(fp, td); return (EINVAL); } off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) goto out; inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_getdents: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < IBCS2_RECLEN(reclen)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (ibcs2_ino_t)BSD_DIRENT(inp)->d_fileno; idb.d_off = (ibcs2_off_t)off; idb.d_reclen = (u_short)IBCS2_RECLEN(reclen); if ((error = copyout((caddr_t)&idb, outp, 10)) != 0 || (error = copyout(BSD_DIRENT(inp)->d_name, outp + 10, BSD_DIRENT(inp)->d_namlen + 1)) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += IBCS2_RECLEN(reclen); resid -= IBCS2_RECLEN(reclen); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0, td); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int ibcs2_read(td, uap) struct thread *td; struct ibcs2_read_args *uap; { register struct vnode *vp; register caddr_t inp, buf; /* BSD-format */ register int len, reclen; /* BSD-format */ register caddr_t outp; /* iBCS2-format */ register int resid; /* iBCS2-format */ struct file *fp; struct uio auio; struct iovec aiov; struct ibcs2_direct { ibcs2_ino_t ino; char name[14]; } idb; off_t off; /* true file offset */ int buflen, error, eofflag, size; u_long *cookies = NULL, *cookiep; int ncookies; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) { if (error == EINVAL) return read(td, (struct read_args *)uap); else return error; } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return read(td, (struct read_args *)uap); } off = fp->f_offset; if (vp->v_type != VDIR) return read(td, (struct read_args *)uap); DPRINTF(("ibcs2_read: read directory\n")); buflen = max(DIRBLKSIZ, uap->nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) goto out; #endif /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies)) != 0) { DPRINTF(("VOP_READDIR failed: %d\n", error)); goto out; } inp = buf; outp = uap->buf; resid = uap->nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { len -= BSD_DIRENT(inp)->d_reclen; inp += BSD_DIRENT(inp)->d_reclen; cookiep++; ncookies--; } } for (; len > 0 && resid > 0; len -= reclen) { if (cookiep && ncookies == 0) break; reclen = BSD_DIRENT(inp)->d_reclen; if (reclen & 3) { printf("ibcs2_read: reclen=%d\n", reclen); error = EFAULT; goto out; } if (BSD_DIRENT(inp)->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; continue; } if (reclen > len || resid < sizeof(struct ibcs2_direct)) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make an iBCS2-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). * * TODO: if length(filename) > 14, then break filename into * multiple entries and set inode = 0xffff except last */ idb.ino = (BSD_DIRENT(inp)->d_fileno > 0xfffe) ? 0xfffe : BSD_DIRENT(inp)->d_fileno; (void)copystr(BSD_DIRENT(inp)->d_name, idb.name, 14, &size); bzero(idb.name + size, 14 - size); if ((error = copyout(&idb, outp, sizeof(struct ibcs2_direct))) != 0) goto out; /* advance past this real entry */ if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; inp += reclen; /* advance output past iBCS2-shaped entry */ outp += sizeof(struct ibcs2_direct); resid -= sizeof(struct ibcs2_direct); } /* if we squished out the whole block, try again */ if (outp == uap->buf) goto again; fp->f_offset = off; /* update the vnode offset */ eof: td->td_retval[0] = uap->nbytes - resid; out: VOP_UNLOCK(vp, 0, td); fdrop(fp, td); if (cookies) free(cookies, M_TEMP); free(buf, M_TEMP); return (error); } int ibcs2_mknod(td, uap) struct thread *td; struct ibcs2_mknod_args *uap; { caddr_t sg = stackgap_init(); CHECKALTCREAT(td, &sg, uap->path); if (S_ISFIFO(uap->mode)) { struct mkfifo_args ap; ap.path = uap->path; ap.mode = uap->mode; return mkfifo(td, &ap); } else { struct mknod_args ap; ap.path = uap->path; ap.mode = uap->mode; ap.dev = uap->dev; return mknod(td, &ap); } } int ibcs2_getgroups(td, uap) struct thread *td; struct ibcs2_getgroups_args *uap; { int error, i; ibcs2_gid_t *iset = NULL; struct getgroups_args sa; gid_t *gp; caddr_t sg = stackgap_init(); if (uap->gidsetsize < 0) return (EINVAL); if (uap->gidsetsize > NGROUPS_MAX) uap->gidsetsize = NGROUPS_MAX; sa.gidsetsize = uap->gidsetsize; if (uap->gidsetsize) { sa.gidset = stackgap_alloc(&sg, NGROUPS_MAX * sizeof(gid_t *)); iset = stackgap_alloc(&sg, uap->gidsetsize * sizeof(ibcs2_gid_t)); } if ((error = getgroups(td, &sa)) != 0) return error; if (uap->gidsetsize == 0) return 0; for (i = 0, gp = sa.gidset; i < td->td_retval[0]; i++) iset[i] = (ibcs2_gid_t)*gp++; if (td->td_retval[0] && (error = copyout((caddr_t)iset, (caddr_t)uap->gidset, sizeof(ibcs2_gid_t) * td->td_retval[0]))) return error; return 0; } int ibcs2_setgroups(td, uap) struct thread *td; struct ibcs2_setgroups_args *uap; { int error, i; ibcs2_gid_t *iset; struct setgroups_args sa; gid_t *gp; caddr_t sg = stackgap_init(); if (uap->gidsetsize < 0 || uap->gidsetsize > NGROUPS_MAX) return (EINVAL); sa.gidsetsize = uap->gidsetsize; sa.gidset = stackgap_alloc(&sg, sa.gidsetsize * sizeof(gid_t *)); iset = stackgap_alloc(&sg, sa.gidsetsize * sizeof(ibcs2_gid_t *)); if (sa.gidsetsize) { if ((error = copyin((caddr_t)uap->gidset, (caddr_t)iset, sizeof(ibcs2_gid_t *) * uap->gidsetsize)) != 0) return error; } for (i = 0, gp = sa.gidset; i < sa.gidsetsize; i++) *gp++ = (gid_t)iset[i]; return setgroups(td, &sa); } int ibcs2_setuid(td, uap) struct thread *td; struct ibcs2_setuid_args *uap; { struct setuid_args sa; sa.uid = (uid_t)uap->uid; return setuid(td, &sa); } int ibcs2_setgid(td, uap) struct thread *td; struct ibcs2_setgid_args *uap; { struct setgid_args sa; sa.gid = (gid_t)uap->gid; return setgid(td, &sa); } int ibcs2_time(td, uap) struct thread *td; struct ibcs2_time_args *uap; { struct timeval tv; microtime(&tv); td->td_retval[0] = tv.tv_sec; if (uap->tp) return copyout((caddr_t)&tv.tv_sec, (caddr_t)uap->tp, sizeof(ibcs2_time_t)); else return 0; } int ibcs2_pathconf(td, uap) struct thread *td; struct ibcs2_pathconf_args *uap; { uap->name++; /* iBCS2 _PC_* defines are offset by one */ return pathconf(td, (struct pathconf_args *)uap); } int ibcs2_fpathconf(td, uap) struct thread *td; struct ibcs2_fpathconf_args *uap; { uap->name++; /* iBCS2 _PC_* defines are offset by one */ return fpathconf(td, (struct fpathconf_args *)uap); } int ibcs2_sysconf(td, uap) struct thread *td; struct ibcs2_sysconf_args *uap; { int mib[2], value, len, error; struct proc *p; p = td->td_proc; switch(uap->name) { case IBCS2_SC_ARG_MAX: mib[1] = KERN_ARGMAX; break; case IBCS2_SC_CHILD_MAX: PROC_LOCK(p); td->td_retval[0] = lim_cur(td->td_proc, RLIMIT_NPROC); PROC_UNLOCK(p); return 0; case IBCS2_SC_CLK_TCK: td->td_retval[0] = hz; return 0; case IBCS2_SC_NGROUPS_MAX: mib[1] = KERN_NGROUPS; break; case IBCS2_SC_OPEN_MAX: PROC_LOCK(p); td->td_retval[0] = lim_cur(td->td_proc, RLIMIT_NOFILE); PROC_UNLOCK(p); return 0; case IBCS2_SC_JOB_CONTROL: mib[1] = KERN_JOB_CONTROL; break; case IBCS2_SC_SAVED_IDS: mib[1] = KERN_SAVED_IDS; break; case IBCS2_SC_VERSION: mib[1] = KERN_POSIX1; break; case IBCS2_SC_PASS_MAX: td->td_retval[0] = 128; /* XXX - should we create PASS_MAX ? */ return 0; case IBCS2_SC_XOPEN_VERSION: td->td_retval[0] = 2; /* XXX: What should that be? */ return 0; default: return EINVAL; } mib[0] = CTL_KERN; len = sizeof(value); error = kernel_sysctl(td, mib, 2, &value, &len, NULL, 0, NULL); if (error) return error; td->td_retval[0] = value; return 0; } int ibcs2_alarm(td, uap) struct thread *td; struct ibcs2_alarm_args *uap; { int error; struct itimerval *itp, *oitp; struct setitimer_args sa; caddr_t sg = stackgap_init(); itp = stackgap_alloc(&sg, sizeof(*itp)); oitp = stackgap_alloc(&sg, sizeof(*oitp)); timevalclear(&itp->it_interval); itp->it_value.tv_sec = uap->sec; itp->it_value.tv_usec = 0; sa.which = ITIMER_REAL; sa.itv = itp; sa.oitv = oitp; error = setitimer(td, &sa); if (error) return error; if (oitp->it_value.tv_usec) oitp->it_value.tv_sec++; td->td_retval[0] = oitp->it_value.tv_sec; return 0; } int ibcs2_times(td, uap) struct thread *td; struct ibcs2_times_args *uap; { int error; struct getrusage_args ga; struct tms tms; struct timeval t; caddr_t sg = stackgap_init(); struct rusage *ru = stackgap_alloc(&sg, sizeof(*ru)); #define CONVTCK(r) (r.tv_sec * hz + r.tv_usec / (1000000 / hz)) ga.who = RUSAGE_SELF; ga.rusage = ru; error = getrusage(td, &ga); if (error) return error; tms.tms_utime = CONVTCK(ru->ru_utime); tms.tms_stime = CONVTCK(ru->ru_stime); ga.who = RUSAGE_CHILDREN; error = getrusage(td, &ga); if (error) return error; tms.tms_cutime = CONVTCK(ru->ru_utime); tms.tms_cstime = CONVTCK(ru->ru_stime); microtime(&t); td->td_retval[0] = CONVTCK(t); return copyout((caddr_t)&tms, (caddr_t)uap->tp, sizeof(struct tms)); } int ibcs2_stime(td, uap) struct thread *td; struct ibcs2_stime_args *uap; { struct timeval tv; long secs; int error; error = copyin(uap->timep, &secs, sizeof(long)); if (error) return (error); tv.tv_sec = secs; tv.tv_usec = 0; error = kern_settimeofday(td, &tv, NULL); if (error) error = EPERM; return (error); } int ibcs2_utime(td, uap) struct thread *td; struct ibcs2_utime_args *uap; { int error; struct utimes_args sa; struct timeval *tp; caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); sa.path = uap->path; if (uap->buf) { struct ibcs2_utimbuf ubuf; if ((error = copyin((caddr_t)uap->buf, (caddr_t)&ubuf, sizeof(ubuf))) != 0) return error; sa.tptr = stackgap_alloc(&sg, 2 * sizeof(struct timeval *)); tp = (struct timeval *)sa.tptr; tp->tv_sec = ubuf.actime; tp->tv_usec = 0; tp++; tp->tv_sec = ubuf.modtime; tp->tv_usec = 0; } else sa.tptr = NULL; return utimes(td, &sa); } int ibcs2_nice(td, uap) struct thread *td; struct ibcs2_nice_args *uap; { int error; struct setpriority_args sa; sa.which = PRIO_PROCESS; sa.who = 0; sa.prio = td->td_proc->p_nice + uap->incr; if ((error = setpriority(td, &sa)) != 0) return EPERM; td->td_retval[0] = td->td_proc->p_nice; return 0; } /* * iBCS2 getpgrp, setpgrp, setsid, and setpgid */ int ibcs2_pgrpsys(td, uap) struct thread *td; struct ibcs2_pgrpsys_args *uap; { struct proc *p = td->td_proc; switch (uap->type) { case 0: /* getpgrp */ PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; case 1: /* setpgrp */ { struct setpgid_args sa; sa.pid = 0; sa.pgid = 0; setpgid(td, &sa); PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return 0; } case 2: /* setpgid */ { struct setpgid_args sa; sa.pid = uap->pid; sa.pgid = uap->pgid; return setpgid(td, &sa); } case 3: /* setsid */ return setsid(td, NULL); default: return EINVAL; } } /* * XXX - need to check for nested calls */ int ibcs2_plock(td, uap) struct thread *td; struct ibcs2_plock_args *uap; { int error; #define IBCS2_UNLOCK 0 #define IBCS2_PROCLOCK 1 #define IBCS2_TEXTLOCK 2 #define IBCS2_DATALOCK 4 if ((error = suser(td)) != 0) return EPERM; switch(uap->cmd) { case IBCS2_UNLOCK: case IBCS2_PROCLOCK: case IBCS2_TEXTLOCK: case IBCS2_DATALOCK: return 0; /* XXX - TODO */ } return EINVAL; } int ibcs2_uadmin(td, uap) struct thread *td; struct ibcs2_uadmin_args *uap; { #define SCO_A_REBOOT 1 #define SCO_A_SHUTDOWN 2 #define SCO_A_REMOUNT 4 #define SCO_A_CLOCK 8 #define SCO_A_SETCONFIG 128 #define SCO_A_GETDEV 130 #define SCO_AD_HALT 0 #define SCO_AD_BOOT 1 #define SCO_AD_IBOOT 2 #define SCO_AD_PWRDOWN 3 #define SCO_AD_PWRNAP 4 #define SCO_AD_PANICBOOT 1 #define SCO_AD_GETBMAJ 0 #define SCO_AD_GETCMAJ 1 if (suser(td)) return EPERM; switch(uap->cmd) { case SCO_A_REBOOT: case SCO_A_SHUTDOWN: switch(uap->func) { struct reboot_args r; case SCO_AD_HALT: case SCO_AD_PWRDOWN: case SCO_AD_PWRNAP: r.opt = RB_HALT; reboot(td, &r); case SCO_AD_BOOT: case SCO_AD_IBOOT: r.opt = RB_AUTOBOOT; reboot(td, &r); } return EINVAL; case SCO_A_REMOUNT: case SCO_A_CLOCK: case SCO_A_SETCONFIG: return 0; case SCO_A_GETDEV: return EINVAL; /* XXX - TODO */ } return EINVAL; } int ibcs2_sysfs(td, uap) struct thread *td; struct ibcs2_sysfs_args *uap; { #define IBCS2_GETFSIND 1 #define IBCS2_GETFSTYP 2 #define IBCS2_GETNFSTYP 3 switch(uap->cmd) { case IBCS2_GETFSIND: case IBCS2_GETFSTYP: case IBCS2_GETNFSTYP: break; } return EINVAL; /* XXX - TODO */ } int ibcs2_unlink(td, uap) struct thread *td; struct ibcs2_unlink_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return unlink(td, (struct unlink_args *)uap); } int ibcs2_chdir(td, uap) struct thread *td; struct ibcs2_chdir_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return chdir(td, (struct chdir_args *)uap); } int ibcs2_chmod(td, uap) struct thread *td; struct ibcs2_chmod_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return chmod(td, (struct chmod_args *)uap); } int ibcs2_chown(td, uap) struct thread *td; struct ibcs2_chown_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return chown(td, (struct chown_args *)uap); } int ibcs2_rmdir(td, uap) struct thread *td; struct ibcs2_rmdir_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return rmdir(td, (struct rmdir_args *)uap); } int ibcs2_mkdir(td, uap) struct thread *td; struct ibcs2_mkdir_args *uap; { caddr_t sg = stackgap_init(); CHECKALTCREAT(td, &sg, uap->path); return mkdir(td, (struct mkdir_args *)uap); } int ibcs2_symlink(td, uap) struct thread *td; struct ibcs2_symlink_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); CHECKALTCREAT(td, &sg, uap->link); return symlink(td, (struct symlink_args *)uap); } int ibcs2_rename(td, uap) struct thread *td; struct ibcs2_rename_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->from); CHECKALTCREAT(td, &sg, uap->to); return rename(td, (struct rename_args *)uap); } int ibcs2_readlink(td, uap) struct thread *td; struct ibcs2_readlink_args *uap; { caddr_t sg = stackgap_init(); CHECKALTEXIST(td, &sg, uap->path); return readlink(td, (struct readlink_args *) uap); } Index: stable/5/sys/kern/kern_exit.c =================================================================== --- stable/5/sys/kern/kern_exit.c (revision 145381) +++ stable/5/sys/kern/kern_exit.c (revision 145382) @@ -1,761 +1,762 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ktrace.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include /* for acct_process() function prototype */ #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include /* Required to be non-static for SysVR4 emulator */ MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); /* * exit -- * Death of process. * * MPSAFE */ void sys_exit(struct thread *td, struct sys_exit_args *uap) { exit1(td, W_EXITCODE(uap->rval, 0)); /* NOTREACHED */ } /* * Exit: deallocate address space and other resources, change proc state * to zombie, and unlink proc from allproc and parent's lists. Save exit * status and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rv) { struct bintime new_switchtime; struct proc *p, *nq, *q; struct tty *tp; struct vnode *ttyvp; struct vmspace *vm; struct vnode *vtmp; #ifdef KTRACE struct vnode *tracevp; struct ucred *tracecred; #endif struct plimit *plim; int refcnt; /* * Drop Giant if caller has it. Eventually we should warn about * being called with Giant held. */ while (mtx_owned(&Giant)) mtx_unlock(&Giant); p = td->td_proc; if (p == initproc) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); if (p->p_flag & P_HADTHREADS) { retry: /* * First check if some other thread got here before us.. * if so, act apropriatly, (exit or suspend); */ thread_suspend_check(0); /* * Kill off the other threads. This requires * Some co-operation from other parts of the kernel * so it may not be instant. * With this state set: * Any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediatly with EINTR or EWOULDBLOCK, * which will hopefully force them to back out to userland, * freeing resources as they go, and anything attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last other * thread exits. * If there is already a thread singler after resumption, * calling thread_single will fail, in the case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (thread_single(SINGLE_EXIT)) goto retry; /* * All other activity in this process is now stopped. * Threading support has been turned off. */ } p->p_flag |= P_WEXIT; PROC_UNLOCK(p); /* Are we a task leader? */ if (p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } PROC_LOCK(p); _STOPEVENT(p, S_EXIT, rv); wakeup(&p->p_stype); /* Wakeup anyone in procfs' PIOCWAIT */ PROC_UNLOCK(p); /* * Check if any loadable modules need anything done at process exit. * e.g. SYSV IPC stuff * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), M_ZOMBIE, M_WAITOK); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT); SIGEMPTYSET(p->p_siglist); SIGEMPTYSET(td->td_siglist); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ mtx_lock(&Giant); /* XXX: not sure if needed */ funsetownlst(&p->p_sigiolst); /* * Close open files and release open-file table. * This may block! */ fdfree(td); mtx_unlock(&Giant); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ mtx_lock(&ppeers_lock); if (p->p_leader->p_peers) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); /* The next two chunks should probably be moved to vmspace_exit. */ vm = p->p_vmspace; /* * Release user portion of address space. * This releases references to vnodes, * which could cause I/O if the file has been unlinked. * Need to do this early enough that we can still sleep. * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. * * Processes sharing the same vmspace may exit in one order, and * get cleaned up by vmspace_exit() in a different order. The * last exiting process to reach this point releases as much of * the environment as it can, and the last process cleaned up * by vmspace_exit() (which decrements exitingcnt) cleans up the * remainder. */ atomic_add_int(&vm->vm_exitingcnt, 1); do refcnt = vm->vm_refcnt; while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1)); if (refcnt == 1) { shmexit(vm); pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); (void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); } mtx_lock(&Giant); sx_xlock(&proctree_lock); if (SESS_LEADER(p)) { struct session *sp; sp = p->p_session; if (sp->s_ttyvp) { /* * Controlling process. * Signal foreground pgrp, * drain controlling terminal * and revoke access to controlling terminal. */ if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) { tp = sp->s_ttyp; if (sp->s_ttyp->t_pgrp) { PGRP_LOCK(sp->s_ttyp->t_pgrp); pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); PGRP_UNLOCK(sp->s_ttyp->t_pgrp); } /* XXX tp should be locked. */ sx_xunlock(&proctree_lock); (void) ttywait(tp); sx_xlock(&proctree_lock); /* * The tty could have been revoked * if we blocked. */ if (sp->s_ttyvp) { ttyvp = sp->s_ttyvp; SESS_LOCK(p->p_session); sp->s_ttyvp = NULL; SESS_UNLOCK(p->p_session); sx_xunlock(&proctree_lock); VOP_REVOKE(ttyvp, REVOKEALL); vrele(ttyvp); sx_xlock(&proctree_lock); } } if (sp->s_ttyvp) { ttyvp = sp->s_ttyvp; SESS_LOCK(p->p_session); sp->s_ttyvp = NULL; SESS_UNLOCK(p->p_session); vrele(ttyvp); } /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. * (for logging and informational purposes) */ } SESS_LOCK(p->p_session); sp->s_leader = NULL; SESS_UNLOCK(p->p_session); } fixjobc(p, p->p_pgrp, 0); sx_xunlock(&proctree_lock); (void)acct_process(td); mtx_unlock(&Giant); #ifdef KTRACE /* * release trace file */ PROC_LOCK(p); mtx_lock(&ktrace_mtx); p->p_traceflag = 0; /* don't trace the vrele() */ tracevp = p->p_tracevp; p->p_tracevp = NULL; tracecred = p->p_tracecred; p->p_tracecred = NULL; mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); if (tracevp != NULL) { mtx_lock(&Giant); vrele(tracevp); mtx_unlock(&Giant); } if (tracecred != NULL) crfree(tracecred); #endif /* * Release reference to text vnode */ if ((vtmp = p->p_textvp) != NULL) { p->p_textvp = NULL; mtx_lock(&Giant); vrele(vtmp); mtx_unlock(&Giant); } /* * Release our limits structure. */ PROC_LOCK(p); plim = p->p_limit; p->p_limit = NULL; PROC_UNLOCK(p); lim_free(plim); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(initproc); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); proc_reparent(q, initproc); q->p_sigparent = SIGCHLD; /* * Traced processes are killed * since their existence means someone is screwing up. */ if (q->p_flag & P_TRACED) { q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Save exit status and final rusage info, adding in child rusage * info and self times. */ mtx_lock(&Giant); PROC_LOCK(p); p->p_xstat = rv; p->p_xthread = td; *p->p_ru = p->p_stats->p_ru; mtx_lock_spin(&sched_lock); calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL); mtx_unlock_spin(&sched_lock); ruadd(p->p_ru, &p->p_stats->p_cru); mtx_unlock(&Giant); /* * Notify interested parties of our demise. */ KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); /* * Just delete all entries in the p_klist. At this point we won't * report any more events, and there are nasty race conditions that * can beat us if we don't. */ knlist_clear(&p->p_klist, 1); /* * Notify parent that we're gone. If parent has the PS_NOCLDWAIT * flag set, or if the handler is set to SIG_IGN, notify process * 1 instead (and hope it will handle this situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, initproc); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * If this was the last child of our parent, notify * parent, so in case he was wait(2)ing, he will * continue. */ if (LIST_EMPTY(&pp->p_children)) wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == initproc) psignal(p->p_pptr, SIGCHLD); else if (p->p_sigparent != 0) psignal(p->p_pptr, p->p_sigparent); PROC_UNLOCK(p->p_pptr); /* * If this is a kthread, then wakeup anyone waiting for it to exit. */ if (p->p_flag & P_KTHREAD) wakeup(p); PROC_UNLOCK(p); /* * Finally, call machine-dependent code to release the remaining * resources including address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); PROC_LOCK(p); PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); while (mtx_owned(&Giant)) mtx_unlock(&Giant); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid any possibly context * switches while marked as a zombie including blocking on * a mutex. */ mtx_lock_spin(&sched_lock); p->p_state = PRS_ZOMBIE; critical_enter(); mtx_unlock_spin(&sched_lock); wakeup(p->p_pptr); PROC_UNLOCK(p->p_pptr); mtx_lock_spin(&sched_lock); critical_exit(); /* Do the same timestamp bookkeeping that mi_switch() would do. */ binuptime(&new_switchtime); bintime_add(&p->p_runtime, &new_switchtime); bintime_sub(&p->p_runtime, PCPU_PTR(switchtime)); PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); cnt.v_swtch++; sched_exit(p->p_pptr, td); /* * hopefully no one will try to deliver a signal to the process this * late in the game. */ knlist_destroy(&p->p_klist); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); } #ifdef COMPAT_43 /* * MPSAFE. The dirty work is handled by kern_wait(). */ int owait(struct thread *td, struct owait_args *uap __unused) { int error, status; error = kern_wait(td, WAIT_ANY, &status, 0, NULL); if (error == 0) td->td_retval[1] = status; return (error); } #endif /* COMPAT_43 */ /* * MPSAFE. The dirty work is handled by kern_wait(). */ int wait4(struct thread *td, struct wait_args *uap) { struct rusage ru; int error, status; error = kern_wait(td, uap->pid, &status, uap->options, &ru); if (uap->status != NULL && error == 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rusage) { int nfound; struct proc *p, *q, *t; int error; q = td->td_proc; if (pid == 0) { PROC_LOCK(q); pid = -q->p_pgid; PROC_UNLOCK(q); } if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE)) return (EINVAL); loop: nfound = 0; sx_xlock(&proctree_lock); LIST_FOREACH(p, &q->p_children, p_sibling) { PROC_LOCK(p); if (pid != WAIT_ANY && p->p_pid != pid && p->p_pgid != -pid) { PROC_UNLOCK(p); continue; } /* * This special case handles a kthread spawned by linux_clone * (see linux_misc.c). The linux_wait4 and linux_waitpid * functions need to be able to distinguish between waiting * on a process and waiting on a thread. It is a thread if * p_sigparent is not SIGCHLD, and the WLINUXCLONE option * signifies we want to wait for threads and not processes. */ if ((p->p_sigparent != SIGCHLD) ^ ((options & WLINUXCLONE) != 0)) { PROC_UNLOCK(p); continue; } nfound++; if (p->p_state == PRS_ZOMBIE) { td->td_retval[0] = p->p_pid; if (status) *status = p->p_xstat; /* convert to int */ if (rusage) *rusage = *p->p_ru; /* * If we got the child via a ptrace 'attach', * we need to give it back to the old parent. */ PROC_UNLOCK(p); if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { PROC_LOCK(p); p->p_oppid = 0; proc_reparent(p, t); PROC_UNLOCK(p); psignal(t, SIGCHLD); wakeup(t); PROC_UNLOCK(t); sx_xunlock(&proctree_lock); return (0); } /* * Remove other references to this process to ensure * we have an exclusive reference. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); LIST_REMOVE(p, p_sibling); leavepgrp(p); sx_xunlock(&proctree_lock); /* * As a side effect of this lock, we know that * all other writes to this proc are visible now, so * no more locking is needed for p. */ mtx_lock(&Giant); PROC_LOCK(p); p->p_xstat = 0; /* XXX: why? */ PROC_UNLOCK(p); PROC_LOCK(q); ruadd(&q->p_stats->p_cru, p->p_ru); PROC_UNLOCK(q); FREE(p->p_ru, M_ZOMBIE); p->p_ru = NULL; mtx_unlock(&Giant); /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); /* * Free credentials, arguments, and sigacts */ crfree(p->p_ucred); p->p_ucred = NULL; pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); p->p_sigacts = NULL; /* * do any thread-system specific cleanups */ thread_wait(p); /* * Give vm and machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. */ vm_waitproc(p); #ifdef MAC mac_destroy_proc(p); #endif KASSERT(FIRST_THREAD_IN_PROC(p), ("kern_wait: no residual thread!")); uma_zfree(proc_zone, p); sx_xlock(&allproc_lock); nprocs--; sx_xunlock(&allproc_lock); return (0); } mtx_lock_spin(&sched_lock); if (P_SHOULDSTOP(p) && (p->p_suspcount == p->p_numthreads) && ((p->p_flag & P_WAITED) == 0) && (p->p_flag & P_TRACED || options & WUNTRACED)) { mtx_unlock_spin(&sched_lock); p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; if (status) *status = W_STOPCODE(p->p_xstat); PROC_UNLOCK(p); return (0); } mtx_unlock_spin(&sched_lock); if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; p->p_flag &= ~P_CONTINUED; PROC_UNLOCK(p); if (status) *status = SIGCONT; return (0); } PROC_UNLOCK(p); } if (nfound == 0) { sx_xunlock(&proctree_lock); return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); td->td_retval[0] = 0; return (0); } PROC_LOCK(q); sx_xunlock(&proctree_lock); error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); PROC_UNLOCK(q); if (error) return (error); goto loop; } /* * Make process 'parent' the new parent of process 'child'. * Must be called with an exclusive hold of proctree lock. */ void proc_reparent(struct proc *child, struct proc *parent) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(child, MA_OWNED); if (child->p_pptr == parent) return; LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); child->p_pptr = parent; } Index: stable/5/sys/kern/kern_resource.c =================================================================== --- stable/5/sys/kern/kern_resource.c (revision 145381) +++ stable/5/sys/kern/kern_resource.c (revision 145382) @@ -1,1150 +1,1151 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include static int donice(struct thread *td, struct proc *chgp, int n); static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct mtx uihashtbl_mtx; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static struct uidinfo *uilookup(uid_t uid); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif /* * MPSAFE */ int getpriority(td, uap) struct thread *td; register struct getpriority_args *uap; { struct proc *p; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) { low = p->p_nice; } PROC_UNLOCK(p); } break; case PRIO_PGRP: { register struct pgrp *pg; sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; } case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { PROC_LOCK(p); if (!p_cansee(td, p) && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif /* * MPSAFE */ int setpriority(td, uap) struct thread *td; register struct setpriority_args *uap; { struct proc *curp; register struct proc *p; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == 0) break; if (p_cansee(td, p) == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: { register struct pgrp *pg; sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (!p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; } case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_ucred->cr_uid == uap->who && !p_cansee(td, p)) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && suser(td) != 0) return (EACCES); mtx_lock_spin(&sched_lock); sched_nice(p, n); mtx_unlock_spin(&sched_lock); return (0); } /* * Set realtime priority * * MPSAFE */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int rtprio(td, uap) struct thread *td; /* curthread */ register struct rtprio_args *uap; { struct proc *curp; struct proc *p; struct ksegrp *kg; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; curp = td->td_proc; if (uap->pid == 0) { p = curp; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; mtx_lock_spin(&sched_lock); /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * XXXKSE Maybe need a new interface to report * priorities of multiple system scope threads. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td->td_ksegrp, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_KSEGRP_IN_PROC(p, kg) { pri_to_rtp(kg, &rtp2); if ((rtp2.type < rtp.type) || ((rtp2.type == rtp.type) && (rtp2.prio < rtp.prio))) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* disallow setting rtprio in most cases if not superuser */ if (suser(td) != 0) { /* can't set someone else's */ if (uap->pid) { error = EPERM; break; } /* can't set realtime priority */ /* * Realtime priority has to be restricted for reasons which should be * obvious. However, for idle priority, there is a potential for * system deadlock if an idleprio process gains a lock on a resource * that other processes need (and the idleprio process can't run * due to a CPU-bound normal process). Fix me! XXX */ #if 0 if (RTP_PRIO_IS_REALTIME(rtp.type)) #endif if (rtp.type != RTP_PRIO_NORMAL) { error = EPERM; break; } } mtx_lock_spin(&sched_lock); /* * If we are setting our own priority, set just our * KSEGRP but if we are doing another process, * do all the groups on that process. If we * specify our own pid we do the latter. */ if (uap->pid == 0) { error = rtp_to_pri(&rtp, td->td_ksegrp); } else { FOREACH_KSEGRP_IN_PROC(p, kg) { if ((error = rtp_to_pri(&rtp, kg)) != 0) { break; } } } mtx_unlock_spin(&sched_lock); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg) { mtx_assert(&sched_lock, MA_OWNED); if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } sched_class(kg, rtp->type); if (curthread->td_ksegrp == kg) { curthread->td_base_pri = kg->kg_user_pri; sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */ } return (0); } void pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp) { mtx_assert(&sched_lock, MA_OWNED); switch (PRI_BASE(kg->kg_pri_class)) { case PRI_REALTIME: rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = kg->kg_pri_class; } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif /* * MPSAFE */ int osetrlimit(td, uap) struct thread *td; register struct osetrlimit_args *uap; { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif /* * MPSAFE */ int ogetrlimit(td, uap) struct thread *td; register struct ogetrlimit_args *uap; { struct orlimit olim; struct rlimit rl; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rl); PROC_UNLOCK(p); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* * MPSAFE */ int setrlimit(td, uap) struct thread *td; register struct __setrlimit_args *uap; { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } int kern_setrlimit(td, which, limp) struct thread *td; u_int which; struct rlimit *limp; { struct plimit *newlim, *oldlim; struct proc *p; register struct rlimit *alimp; rlim_t oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz = 0; p = td->td_proc; newlim = lim_alloc(); PROC_LOCK(p); oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL))) { PROC_UNLOCK(p); lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; switch (which) { case RLIMIT_CPU: mtx_lock_spin(&sched_lock); p->p_cpulimit = limp->rlim_cur; mtx_unlock_spin(&sched_lock); break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = alimp->rlim_cur; break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } *alimp = *limp; p->p_limit = newlim; PROC_UNLOCK(p); lim_free(oldlim); if (which == RLIMIT_STACK) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; mtx_lock(&Giant); if (limp->rlim_cur > oldssiz) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz; } addr = trunc_page(addr); size = round_page(size); (void) vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, FALSE); mtx_unlock(&Giant); } } return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* * MPSAFE */ /* ARGSUSED */ int getrlimit(td, uap) struct thread *td; register struct __getrlimit_args *uap; { struct rlimit rlim; struct proc *p; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); p = td->td_proc; PROC_LOCK(p); lim_rlimit(p, uap->which, &rlim); PROC_UNLOCK(p); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return(error); } /* * Transform the running time and tick information in proc p into user, * system, and interrupt time usage. */ void calcru(p, up, sp, ip) struct proc *p; struct timeval *up; struct timeval *sp; struct timeval *ip; { struct bintime bt, rt; struct timeval tv; struct thread *td; /* {user, system, interrupt, total} {ticks, usec}; previous tu: */ u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu; int problemcase; mtx_assert(&sched_lock, MA_OWNED); /* XXX: why spl-protect ? worst case is an off-by-one report */ ut = p->p_uticks; st = p->p_sticks; it = p->p_iticks; tt = ut + st + it; if (tt == 0) { st = 1; tt = 1; } rt = p->p_runtime; problemcase = 0; FOREACH_THREAD_IN_PROC(p, td) { /* * Adjust for the current time slice. This is actually fairly * important since the error here is on the order of a time * quantum, which is much greater than the sampling error. */ if (td == curthread) { binuptime(&bt); bintime_sub(&bt, PCPU_PTR(switchtime)); bintime_add(&rt, &bt); } else if (TD_IS_RUNNING(td)) { /* * XXX: this case should add the difference between * the current time and the switch time as above, * but the switch time is inaccessible, so we can't * do the adjustment and will end up with a wrong * runtime. A previous call with a different * curthread may have obtained a (right or wrong) * runtime that is in advance of ours. Just set a * flag to avoid warning about this known problem. */ problemcase = 1; } } bintime2timeval(&rt, &tv); tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec; ptu = p->p_uu + p->p_su + p->p_iu; if (tu < ptu) { if (!problemcase) printf( "calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n", (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm); tu = ptu; } if ((int64_t)tu < 0) { printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ptu; } /* Subdivide tu. */ uu = (tu * ut) / tt; su = (tu * st) / tt; iu = tu - uu - su; /* Enforce monotonicity. */ if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) { if (uu < p->p_uu) uu = p->p_uu; else if (uu + p->p_su + p->p_iu > tu) uu = tu - p->p_su - p->p_iu; if (st == 0) su = p->p_su; else { su = ((tu - uu) * st) / (st + it); if (su < p->p_su) su = p->p_su; else if (uu + su + p->p_iu > tu) su = tu - uu - p->p_iu; } KASSERT(uu + su + p->p_iu <= tu, ("calcru: monotonisation botch 1")); iu = tu - uu - su; KASSERT(iu >= p->p_iu, ("calcru: monotonisation botch 2")); } p->p_uu = uu; p->p_su = su; p->p_iu = iu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; if (ip != NULL) { ip->tv_sec = iu / 1000000; ip->tv_usec = iu % 1000000; } } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif /* * MPSAFE */ /* ARGSUSED */ int getrusage(td, uap) register struct thread *td; register struct getrusage_args *uap; { struct rusage ru; struct proc *p; p = td->td_proc; switch (uap->who) { case RUSAGE_SELF: mtx_lock(&Giant); mtx_lock_spin(&sched_lock); calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime, NULL); mtx_unlock_spin(&sched_lock); ru = p->p_stats->p_ru; mtx_unlock(&Giant); break; case RUSAGE_CHILDREN: mtx_lock(&Giant); ru = p->p_stats->p_cru; mtx_unlock(&Giant); break; default: return (EINVAL); break; } return (copyout(&ru, uap->rusage, sizeof(struct rusage))); } void ruadd(ru, ru2) register struct rusage *ru, *ru2; { register long *ip, *ip2; register int i; timevaladd(&ru->ru_utime, &ru2->ru_utime); timevaladd(&ru->ru_stime, &ru2->ru_stime); if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = (struct plimit *)malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); limp->pl_refcnt = 1; limp->pl_mtx = mtx_pool_alloc(mtxpool_sleep); return (limp); } struct plimit * lim_hold(limp) struct plimit *limp; { LIM_LOCK(limp); limp->pl_refcnt++; LIM_UNLOCK(limp); return (limp); } void lim_free(limp) struct plimit *limp; { LIM_LOCK(limp); KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); if (--limp->pl_refcnt == 0) { LIM_UNLOCK(limp); free((void *)limp, M_PLIMIT); return; } LIM_UNLOCK(limp); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(dst, src) struct plimit *dst, *src; { KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct proc *p, int which) { struct rlimit rl; lim_rlimit(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; } /* * Find the uidinfo structure for a uid. This structure is used to * track the total resource consumption (process count, socket buffer * size, etc.) for the uid and impose limits. */ void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_mtx must be locked. */ static struct uidinfo * uilookup(uid) uid_t uid; { struct uihashhead *uipp; struct uidinfo *uip; mtx_assert(&uihashtbl_mtx, MA_OWNED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Increase refcount on uidinfo struct returned. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid) uid_t uid; { struct uidinfo *old_uip, *uip; mtx_lock(&uihashtbl_mtx); uip = uilookup(uid); if (uip == NULL) { mtx_unlock(&uihashtbl_mtx); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); mtx_lock(&uihashtbl_mtx); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((old_uip = uilookup(uid)) != NULL) { /* Someone else beat us to it. */ free(uip, M_UIDINFO); uip = old_uip; } else { uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); uip->ui_uid = uid; LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); } } uihold(uip); mtx_unlock(&uihashtbl_mtx); return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(uip) struct uidinfo *uip; { UIDINFO_LOCK(uip); uip->ui_ref++; UIDINFO_UNLOCK(uip); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count * back up, loose the lock and aquire the locks in the proper * order to try again. */ void uifree(uip) struct uidinfo *uip; { /* Prepare for optimal case. */ UIDINFO_LOCK(uip); if (--uip->ui_ref != 0) { UIDINFO_UNLOCK(uip); return; } /* Prepare for suboptimal case. */ uip->ui_ref++; UIDINFO_UNLOCK(uip); mtx_lock(&uihashtbl_mtx); UIDINFO_LOCK(uip); /* * We must subtract one from the count again because we backed out * our initial subtraction before dropping the lock. * Since another thread may have added a reference after we dropped the * initial lock we have to test for zero again. */ if (--uip->ui_ref == 0) { LIST_REMOVE(uip, ui_hash); mtx_unlock(&uihashtbl_mtx); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %jd\n", uip->ui_uid, (intmax_t)uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); UIDINFO_UNLOCK(uip); FREE(uip, M_UIDINFO); return; } mtx_unlock(&uihashtbl_mtx); UIDINFO_UNLOCK(uip); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(uip, diff, max) struct uidinfo *uip; int diff; int max; { UIDINFO_LOCK(uip); /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_proccnt += diff; if (uip->ui_proccnt < 0) printf("negative proccnt for uid = %d\n", uip->ui_uid); UIDINFO_UNLOCK(uip); return (1); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(uip, hiwat, to, max) struct uidinfo *uip; u_int *hiwat; u_int to; rlim_t max; { rlim_t new; UIDINFO_LOCK(uip); new = uip->ui_sbsize + to - *hiwat; /* Don't allow them to exceed max, but allow subtraction */ if (to > *hiwat && new > max) { UIDINFO_UNLOCK(uip); return (0); } uip->ui_sbsize = new; UIDINFO_UNLOCK(uip); *hiwat = to; if (new < 0) printf("negative sbsize for uid = %d\n", uip->ui_uid); return (1); } Index: stable/5/sys/sys/resource.h =================================================================== --- stable/5/sys/sys/resource.h (revision 145381) +++ stable/5/sys/sys/resource.h (revision 145382) @@ -1,169 +1,168 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)resource.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_RESOURCE_H_ #define _SYS_RESOURCE_H_ #include #include #include /* * Process priority specifications to get/setpriority. */ #define PRIO_MIN -20 #define PRIO_MAX 20 #define PRIO_PROCESS 0 #define PRIO_PGRP 1 #define PRIO_USER 2 /* * Resource utilization information. */ #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN -1 struct rusage { struct timeval ru_utime; /* user time used */ struct timeval ru_stime; /* system time used */ long ru_maxrss; /* max resident set size */ #define ru_first ru_ixrss long ru_ixrss; /* integral shared memory size */ long ru_idrss; /* integral unshared data " */ long ru_isrss; /* integral unshared stack " */ long ru_minflt; /* page reclaims */ long ru_majflt; /* page faults */ long ru_nswap; /* swaps */ long ru_inblock; /* block input operations */ long ru_oublock; /* block output operations */ long ru_msgsnd; /* messages sent */ long ru_msgrcv; /* messages received */ long ru_nsignals; /* signals received */ long ru_nvcsw; /* voluntary context switches */ long ru_nivcsw; /* involuntary " */ #define ru_last ru_nivcsw }; /* * Resource limits */ #define RLIMIT_CPU 0 /* cpu time in milliseconds */ #define RLIMIT_FSIZE 1 /* maximum file size */ #define RLIMIT_DATA 2 /* data size */ #define RLIMIT_STACK 3 /* stack size */ #define RLIMIT_CORE 4 /* core file size */ #define RLIMIT_RSS 5 /* resident set size */ #define RLIMIT_MEMLOCK 6 /* locked-in-memory address space */ #define RLIMIT_NPROC 7 /* number of processes */ #define RLIMIT_NOFILE 8 /* number of open files */ #define RLIMIT_SBSIZE 9 /* maximum size of all socket buffers */ #define RLIMIT_VMEM 10 /* virtual process size (inclusive of mmap) */ #define RLIMIT_AS RLIMIT_VMEM /* standard name for RLIMIT_VMEM */ #define RLIM_NLIMITS 11 /* number of resource limits */ #define RLIM_INFINITY ((rlim_t)(((u_quad_t)1 << 63) - 1)) /* XXX Missing: RLIM_SAVED_MAX, RLIM_SAVED_CUR */ /* * Resource limit string identifiers */ #ifdef _RLIMIT_IDENT static char *rlimit_ident[] = { "cpu", "fsize", "data", "stack", "core", "rss", "memlock", "nproc", "nofile", "sbsize", "vmem", }; #endif #ifndef _RLIM_T_DECLARED typedef __rlim_t rlim_t; #define _RLIM_T_DECLARED #endif struct rlimit { rlim_t rlim_cur; /* current (soft) limit */ rlim_t rlim_max; /* maximum value for rlim_cur */ }; #if __BSD_VISIBLE struct orlimit { __int32_t rlim_cur; /* current (soft) limit */ __int32_t rlim_max; /* maximum value for rlim_cur */ }; struct loadavg { __fixpt_t ldavg[3]; long fscale; }; #define CP_USER 0 #define CP_NICE 1 #define CP_SYS 2 #define CP_INTR 3 #define CP_IDLE 4 #define CPUSTATES 5 #endif /* __BSD_VISIBLE */ #ifdef _KERNEL + extern struct loadavg averunnable; extern long cp_time[CPUSTATES]; - -int kern_setrlimit(struct thread *, u_int, struct rlimit *); #else __BEGIN_DECLS /* XXX 2nd arg to [gs]etpriority() should be an id_t */ int getpriority(int, int); int getrlimit(int, struct rlimit *); int getrusage(int, struct rusage *); int setpriority(int, int, int); int setrlimit(int, const struct rlimit *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_RESOURCE_H_ */ Index: stable/5/sys/sys/syscallsubr.h =================================================================== --- stable/5/sys/sys/syscallsubr.h (revision 145381) +++ stable/5/sys/sys/syscallsubr.h (revision 145382) @@ -1,122 +1,127 @@ /*- * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include struct itimerval; struct mbuf; struct msghdr; +struct rlimit; +struct rusage; struct sockaddr; struct kevent; int kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen); int kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_bind(struct thread *td, int fd, struct sockaddr *sa); int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg); int kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode); int kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid); int kern_connect(struct thread *td, int fd, struct sockaddr *sa); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_kevent(struct thread *td, int fd, struct kevent *changelist, int nchanges, enum uio_seg changeseg, struct kevent *eventlist, int nevents, enum uio_seg eventseg, const struct timespec *timeout); int kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid); int kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg); int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode); int kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode); int kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, int dev); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, int mode); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, int count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg); int kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); +int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_sigaction(struct thread *td, int sig, struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int old); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg); int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length); int kern_unlink(struct thread *td, char *path, enum uio_seg pathseg); int kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); +int kern_wait(struct thread *td, pid_t pid, int *status, int options, + struct rusage *rup); int kern_writev(struct thread *td, int fd, struct uio *auio); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ #endif /* !_SYS_SYSCALLSUBR_H_ */ Index: stable/5/sys/sys/wait.h =================================================================== --- stable/5/sys/sys/wait.h (revision 145381) +++ stable/5/sys/sys/wait.h (revision 145382) @@ -1,113 +1,109 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)wait.h 8.2 (Berkeley) 7/10/94 * $FreeBSD$ */ #ifndef _SYS_WAIT_H_ #define _SYS_WAIT_H_ #include /* * This file holds definitions relevant to the wait4 system call and the * alternate interfaces that use it (wait, wait3, waitpid). */ /* * Macros to test the exit status returned by wait and extract the relevant * values. */ #if __BSD_VISIBLE #define _W_INT(w) (*(int *)&(w)) /* Convert union wait to int. */ #define WCOREFLAG 0200 #else #define _W_INT(i) (i) #endif #define _WSTATUS(x) (_W_INT(x) & 0177) #define _WSTOPPED 0177 /* _WSTATUS if process is stopped */ #define WIFSTOPPED(x) (_WSTATUS(x) == _WSTOPPED) #define WSTOPSIG(x) (_W_INT(x) >> 8) #define WIFSIGNALED(x) (_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0) #define WTERMSIG(x) (_WSTATUS(x)) #define WIFEXITED(x) (_WSTATUS(x) == 0) #define WEXITSTATUS(x) (_W_INT(x) >> 8) #define WIFCONTINUED(x) (x == 0x13) /* 0x13 == SIGCONT */ #if __BSD_VISIBLE #define WCOREDUMP(x) (_W_INT(x) & WCOREFLAG) #define W_EXITCODE(ret, sig) ((ret) << 8 | (sig)) #define W_STOPCODE(sig) ((sig) << 8 | _WSTOPPED) #endif /* * Option bits for the third argument of wait4. WNOHANG causes the * wait to not hang if there are no stopped or terminated processes, rather * returning an error indication in this case (pid==0). WUNTRACED * indicates that the caller should receive status about untraced children * which stop due to signals. If children are stopped and a wait without * this option is done, it is as though they were still running... nothing * about them is returned. */ #define WNOHANG 1 /* Don't hang in wait. */ #define WUNTRACED 2 /* Tell about stopped, untraced children. */ #define WCONTINUED 4 /* Report a job control continued process. */ #if __BSD_VISIBLE #define WLINUXCLONE 0x80000000 /* Wait for kthread spawned from linux_clone. */ #endif /* * Tokens for special values of the "pid" parameter to wait4. */ #if __BSD_VISIBLE #define WAIT_ANY (-1) /* any process */ #define WAIT_MYPGRP 0 /* any process in my process group */ #endif /* __BSD_VISIBLE */ -#ifdef _KERNEL -struct rusage; -int kern_wait(struct thread *td, pid_t pid, int *status, int options, - struct rusage *rup); -#else /* !_KERNEL */ +#ifndef _KERNEL #include __BEGIN_DECLS pid_t wait(int *); pid_t waitpid(pid_t, int *, int); #if __BSD_VISIBLE struct rusage; pid_t wait3(int *, int, struct rusage *); pid_t wait4(pid_t, int *, int, struct rusage *); #endif __END_DECLS -#endif /* _KERNEL */ +#endif /* !_KERNEL */ #endif /* !_SYS_WAIT_H_ */